06347be7d97ffd24bfe7eea400d147f29c0fbfe8
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.32  2007/02/28 10:41:56  acolla
19 Run type field added in SHUTTLE framework. Run type is read from "run type" logbook and retrieved by
20 AliPreprocessor::GetRunType() function.
21 Added some ldap definition files.
22
23 Revision 1.30  2007/02/13 11:23:21  acolla
24 Moved getters and setters of Shuttle's main OCDB/Reference, local
25 OCDB/Reference, temp and log folders to AliShuttleInterface
26
27 Revision 1.27  2007/01/30 17:52:42  jgrosseo
28 adding monalisa monitoring
29
30 Revision 1.26  2007/01/23 19:20:03  acolla
31 Removed old ldif files, added TOF, MCH ldif files. Added some options in
32 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
33 SetShuttleLogDir
34
35 Revision 1.25  2007/01/15 19:13:52  acolla
36 Moved some AliInfo to AliDebug in SendMail function
37
38 Revision 1.21  2006/12/07 08:51:26  jgrosseo
39 update (alberto):
40 table, db names in ldap configuration
41 added GRP preprocessor
42 DCS data can also be retrieved by data point
43
44 Revision 1.20  2006/11/16 16:16:48  jgrosseo
45 introducing strict run ordering flag
46 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
47
48 Revision 1.19  2006/11/06 14:23:04  jgrosseo
49 major update (Alberto)
50 o) reading of run parameters from the logbook
51 o) online offline naming conversion
52 o) standalone DCSclient package
53
54 Revision 1.18  2006/10/20 15:22:59  jgrosseo
55 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
56 o) Merging Collect, CollectAll, CollectNew function
57 o) Removing implementation of empty copy constructors (declaration still there!)
58
59 Revision 1.17  2006/10/05 16:20:55  jgrosseo
60 adapting to new CDB classes
61
62 Revision 1.16  2006/10/05 15:46:26  jgrosseo
63 applying to the new interface
64
65 Revision 1.15  2006/10/02 16:38:39  jgrosseo
66 update (alberto):
67 fixed memory leaks
68 storing of objects that failed to be stored to the grid before
69 interfacing of shuttle status table in daq system
70
71 Revision 1.14  2006/08/29 09:16:05  jgrosseo
72 small update
73
74 Revision 1.13  2006/08/15 10:50:00  jgrosseo
75 effc++ corrections (alberto)
76
77 Revision 1.12  2006/08/08 14:19:29  jgrosseo
78 Update to shuttle classes (Alberto)
79
80 - Possibility to set the full object's path in the Preprocessor's and
81 Shuttle's  Store functions
82 - Possibility to extend the object's run validity in the same classes
83 ("startValidity" and "validityInfinite" parameters)
84 - Implementation of the StoreReferenceData function to store reference
85 data in a dedicated CDB storage.
86
87 Revision 1.11  2006/07/21 07:37:20  jgrosseo
88 last run is stored after each run
89
90 Revision 1.10  2006/07/20 09:54:40  jgrosseo
91 introducing status management: The processing per subdetector is divided into several steps,
92 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
93 can keep track of the number of failures and skips further processing after a certain threshold is
94 exceeded. These thresholds can be configured in LDAP.
95
96 Revision 1.9  2006/07/19 10:09:55  jgrosseo
97 new configuration, accesst to DAQ FES (Alberto)
98
99 Revision 1.8  2006/07/11 12:44:36  jgrosseo
100 adding parameters for extended validity range of data produced by preprocessor
101
102 Revision 1.7  2006/07/10 14:37:09  jgrosseo
103 small fix + todo comment
104
105 Revision 1.6  2006/07/10 13:01:41  jgrosseo
106 enhanced storing of last sucessfully processed run (alberto)
107
108 Revision 1.5  2006/07/04 14:59:57  jgrosseo
109 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
110
111 Revision 1.4  2006/06/12 09:11:16  jgrosseo
112 coding conventions (Alberto)
113
114 Revision 1.3  2006/06/06 14:26:40  jgrosseo
115 o) removed files that were moved to STEER
116 o) shuttle updated to follow the new interface (Alberto)
117
118 Revision 1.2  2006/03/07 07:52:34  hristov
119 New version (B.Yordanov)
120
121 Revision 1.6  2005/11/19 17:19:14  byordano
122 RetrieveDATEEntries and RetrieveConditionsData added
123
124 Revision 1.5  2005/11/19 11:09:27  byordano
125 AliShuttle declaration added
126
127 Revision 1.4  2005/11/17 17:47:34  byordano
128 TList changed to TObjArray
129
130 Revision 1.3  2005/11/17 14:43:23  byordano
131 import to local CVS
132
133 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
134 Initial import as subdirectory in AliRoot
135
136 Revision 1.2  2005/09/13 08:41:15  byordano
137 default startTime endTime added
138
139 Revision 1.4  2005/08/30 09:13:02  byordano
140 some docs added
141
142 Revision 1.3  2005/08/29 21:15:47  byordano
143 some docs added
144
145 */
146
147 //
148 // This class is the main manager for AliShuttle. 
149 // It organizes the data retrieval from DCS and call the 
150 // interface methods of AliPreprocessor.
151 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
152 // data for its set of aliases is retrieved. If there is registered
153 // AliPreprocessor for this detector then it will be used
154 // accroding to the schema (see AliPreprocessor).
155 // If there isn't registered AliPreprocessor than the retrieved
156 // data is stored automatically to the undelying AliCDBStorage.
157 // For detSpec is used the alias name.
158 //
159
160 #include "AliShuttle.h"
161
162 #include "AliCDBManager.h"
163 #include "AliCDBStorage.h"
164 #include "AliCDBId.h"
165 #include "AliCDBRunRange.h"
166 #include "AliCDBPath.h"
167 #include "AliCDBEntry.h"
168 #include "AliShuttleConfig.h"
169 #include "DCSClient/AliDCSClient.h"
170 #include "AliLog.h"
171 #include "AliPreprocessor.h"
172 #include "AliShuttleStatus.h"
173 #include "AliShuttleLogbookEntry.h"
174
175 #include <TSystem.h>
176 #include <TObject.h>
177 #include <TString.h>
178 #include <TTimeStamp.h>
179 #include <TObjString.h>
180 #include <TSQLServer.h>
181 #include <TSQLResult.h>
182 #include <TSQLRow.h>
183 #include <TMutex.h>
184
185 #include <TMonaLisaWriter.h>
186
187 #include <fstream>
188
189 #include <sys/types.h>
190 #include <sys/wait.h>
191
192 ClassImp(AliShuttle)
193
194 Bool_t AliShuttle::fgkProcessDCS(kTRUE);
195
196 //______________________________________________________________________________________________
197 AliShuttle::AliShuttle(const AliShuttleConfig* config,
198                 UInt_t timeout, Int_t retries):
199 fConfig(config),
200 fTimeout(timeout), fRetries(retries),
201 fPreprocessorMap(),
202 fLogbookEntry(0),
203 fCurrentDetector(),
204 fStatusEntry(0),
205 fMonitoringMutex(0),
206 fLastActionTime(0),
207 fLastAction(),
208 fMonaLisa(0)
209 {
210         //
211         // config: AliShuttleConfig used
212         // timeout: timeout used for AliDCSClient connection
213         // retries: the number of retries in case of connection error.
214         //
215
216         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
217         for(int iSys=0;iSys<4;iSys++) {
218                 fServer[iSys]=0;
219                 if (iSys < 3)
220                         fFXSlist[iSys].SetOwner(kTRUE);
221         }
222         fPreprocessorMap.SetOwner(kTRUE);
223
224         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
225                 fFirstUnprocessed[iDet] = kFALSE;
226
227         fMonitoringMutex = new TMutex();
228 }
229
230 //______________________________________________________________________________________________
231 AliShuttle::~AliShuttle()
232 {
233 // destructor
234
235         fPreprocessorMap.DeleteAll();
236         for(int iSys=0;iSys<4;iSys++)
237                 if(fServer[iSys]) {
238                         fServer[iSys]->Close();
239                         delete fServer[iSys];
240                         fServer[iSys] = 0;
241                 }
242
243         if (fStatusEntry){
244                 delete fStatusEntry;
245                 fStatusEntry = 0;
246         }
247         
248         if (fMonitoringMutex) 
249         {
250                 delete fMonitoringMutex;
251                 fMonitoringMutex = 0;
252         }
253 }
254
255 //______________________________________________________________________________________________
256 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
257 {
258         //
259         // Registers new AliPreprocessor.
260         // It uses GetName() for indentificator of the pre processor.
261         // The pre processor is registered it there isn't any other
262         // with the same identificator (GetName()).
263         //
264
265         const char* detName = preprocessor->GetName();
266         if(GetDetPos(detName) < 0)
267                 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
268
269         if (fPreprocessorMap.GetValue(detName)) {
270                 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
271                 return;
272         }
273
274         fPreprocessorMap.Add(new TObjString(detName), preprocessor);
275 }
276 //______________________________________________________________________________________________
277 Bool_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
278                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
279 {
280   // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
281   // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
282   // using this function. Use StoreReferenceData instead!
283   // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
284   // finishes the data are transferred to the main storage (Grid).
285
286         return StoreLocally(fgkLocalCDB, path, object, metaData, validityStart, validityInfinite);
287
288 }
289
290 //______________________________________________________________________________________________
291 Bool_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
292 {
293   // Stores a CDB object in the storage for reference data. This objects will not be available during
294   // offline reconstrunction. Use this function for reference data only!
295   // It calls StoreLocally function which temporarily stores the data locally; when the preprocessor
296   // finishes the data are transferred to the main storage (Grid).
297
298         return StoreLocally(fgkLocalRefStorage, path, object, metaData);
299
300 }
301
302 //______________________________________________________________________________________________
303 Bool_t AliShuttle::StoreLocally(const TString& localUri,
304                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
305                         Int_t validityStart, Bool_t validityInfinite)
306 {
307   // Store object temporarily in local storage. Parameters are passed by Store and StoreReferenceData functions.
308   // when the preprocessor finishes the data are transferred to the main storage (Grid).
309   // The parameters are:
310   //   1) Uri of the backup storage (Local)
311   //   2) the object's path.
312   //   3) the object to be stored
313   //   4) the metaData to be associated with the object
314   //   5) the validity start run number w.r.t. the current run,
315   //      if the data is valid only for this run leave the default 0
316   //   6) specifies if the calibration data is valid for infinity (this means until updated),
317   //      typical for calibration runs, the default is kFALSE
318   //
319   // returns 0 if fail, 1 otherwise
320
321         const char* cdbType = (localUri == fgkLocalCDB) ? "CDB" : "Reference";
322
323         Int_t firstRun = GetCurrentRun() - validityStart;
324         if(firstRun < 0) {
325                 AliError("First valid run happens to be less than 0! Setting it to 0.");
326                 firstRun=0;
327         }
328
329         Int_t lastRun = -1;
330         if(validityInfinite) {
331                 lastRun = AliCDBRunRange::Infinity();
332         } else {
333                 lastRun = GetCurrentRun();
334         }
335
336         // Version is set to current run, it will be used later to transfer data to Grid
337         AliCDBId id(path, firstRun, lastRun, GetCurrentRun(), -1);
338
339         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
340                 TObjString runUsed = Form("%d", GetCurrentRun());
341                 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
342         }
343
344         Bool_t result = kFALSE;
345
346         if (!(AliCDBManager::Instance()->GetStorage(localUri))) {
347                 Log("SHUTTLE", Form("StoreLocally - Cannot activate local %s storage", cdbType));
348         } else {
349                 result = AliCDBManager::Instance()->GetStorage(localUri)
350                                         ->Put(object, id, metaData);
351         }
352
353         if(!result) {
354
355                 Log("SHUTTLE", Form("StoreLocally - Can't store %s data!", fCurrentDetector.Data()));
356         }
357
358         return result;
359 }
360
361 //______________________________________________________________________________________________
362 Bool_t AliShuttle::StoreOCDB()
363 {
364   // Called when preprocessor ends successfully or when previous storage attempt failed (kStoreError status)
365   // Calls underlying StoreOCDB(const char*) function twice, for OCDB and Reference storage.
366
367         AliInfo("Storing OCDB data ...");
368         Bool_t resultCDB = StoreOCDB(fgkMainCDB);
369
370         AliInfo("Storing reference data ...");
371         Bool_t resultRef = StoreOCDB(fgkMainRefStorage);
372
373         return resultCDB && resultRef;
374 }
375
376 //______________________________________________________________________________________________
377 Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
378 {
379         //
380         // Called by StoreOCDB(), performs actual storage to the main OCDB and reference storages (Grid)
381         //
382
383         TObjArray* gridIds=0;
384
385         Bool_t result = kTRUE;
386
387         const char* type = 0;
388         TString localURI;
389         if(gridURI == fgkMainCDB) {
390                 type = "OCDB";
391                 localURI = fgkLocalCDB;
392         } else if(gridURI == fgkMainRefStorage) {
393                 type = "reference";
394                 localURI = fgkLocalRefStorage;
395         } else {
396                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
397                 return kFALSE;
398         }
399
400         AliCDBManager* man = AliCDBManager::Instance();
401
402         AliCDBStorage *gridSto = man->GetStorage(gridURI);
403         if(!gridSto) {
404                 Log("SHUTTLE",
405                         Form("StoreOCDB - cannot activate main %s storage", type));
406                 return kFALSE;
407         }
408
409         gridIds = gridSto->GetQueryCDBList();
410
411         // get objects previously stored in local CDB
412         AliCDBStorage *localSto = man->GetStorage(localURI);
413         if(!localSto) {
414                 Log("SHUTTLE",
415                         Form("StoreOCDB - cannot activate local %s storage", type));
416                 return kFALSE;
417         }
418         AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
419         // Local objects were stored with current run as Grid version!
420         TList* localEntries = localSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
421         localEntries->SetOwner(1);
422
423         // loop on local stored objects
424         TIter localIter(localEntries);
425         AliCDBEntry *aLocEntry = 0;
426         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
427                 aLocEntry->SetOwner(1);
428                 AliCDBId aLocId = aLocEntry->GetId();
429                 aLocEntry->SetVersion(-1);
430                 aLocEntry->SetSubVersion(-1);
431
432                 // If local object is valid up to infinity we store it only if it is
433                 // the first unprocessed run!
434                 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity() &&
435                         !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
436                 {
437                         Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
438                                                 "there are previous unprocessed runs!",
439                                                 fCurrentDetector.Data(), aLocId.GetPath().Data()));
440                         continue;
441                 }
442
443                 // loop on Grid valid Id's
444                 Bool_t store = kTRUE;
445                 TIter gridIter(gridIds);
446                 AliCDBId* aGridId = 0;
447                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
448                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
449                         // skip all objects valid up to infinity
450                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
451                         // if we get here, it means there's already some more recent object stored on Grid!
452                         store = kFALSE;
453                         break;
454                 }
455
456                 // If we get here, the file can be stored!
457                 Bool_t storeOk = gridSto->Put(aLocEntry);
458                 if(!store || storeOk){
459
460                         if (!store)
461                         {
462                                 Log(fCurrentDetector.Data(),
463                                         Form("StoreOCDB - A more recent object already exists in %s storage: <%s>",
464                                                 type, aGridId->ToString().Data()));
465                         } else {
466                                 Log("SHUTTLE",
467                                         Form("StoreOCDB - Object <%s> successfully put into %s storage",
468                                                 aLocId.ToString().Data(), type));
469                         }
470
471                         // removing local filename...
472                         TString filename;
473                         localSto->IdToFilename(aLocId, filename);
474                         AliInfo(Form("Removing local file %s", filename.Data()));
475                         RemoveFile(filename.Data());
476                         continue;
477                 } else  {
478                         Log("SHUTTLE",
479                                 Form("StoreOCDB - Grid %s storage of object <%s> failed",
480                                         type, aLocId.ToString().Data()));
481                         result = kFALSE;
482                 }
483         }
484         localEntries->Clear();
485
486         return result;
487 }
488
489 //______________________________________________________________________________________________
490 void AliShuttle::CleanLocalStorage(const TString& uri)
491 {
492 // Called in case the preprocessor is declared failed. Remove remaining objects from the local storages.
493
494         const char* type = 0;
495         if(uri == fgkLocalCDB) {
496                 type = "OCDB";
497         } else if(uri == fgkLocalRefStorage) {
498                 type = "reference";
499         } else {
500                 AliError(Form("Invalid storage URI: %s", uri.Data()));
501                 return;
502         }
503
504         AliCDBManager* man = AliCDBManager::Instance();
505
506         // open local storage
507         AliCDBStorage *localSto = man->GetStorage(uri);
508         if(!localSto) {
509                 Log("SHUTTLE",
510                         Form("CleanLocalStorage - cannot activate local %s storage", type));
511                 return;
512         }
513
514         TString filename(Form("%s/%s/*/Run*_v%d_s*.root",
515                 localSto->GetBaseFolder().Data(), fCurrentDetector.Data(), GetCurrentRun()));
516
517         AliInfo(Form("filename = %s", filename.Data()));
518
519         AliInfo(Form("Removing remaining local files from run %d and detector %s ...",
520                 GetCurrentRun(), fCurrentDetector.Data()));
521
522         RemoveFile(filename.Data());
523
524 }
525
526 //______________________________________________________________________________________________
527 void AliShuttle::RemoveFile(const char* filename)
528 {
529 // removes local file
530
531         TString command(Form("rm -f %s", filename));
532
533         Int_t result = gSystem->Exec(command.Data());
534         if(result != 0)
535         {
536                 Log("SHUTTLE", Form("RemoveFile - %s: Cannot remove file %s!",
537                         fCurrentDetector.Data(), filename));
538         }
539 }
540
541 //______________________________________________________________________________________________
542 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
543 {
544 // Reads the AliShuttleStatus from the CDB
545
546         if (fStatusEntry){
547                 delete fStatusEntry;
548                 fStatusEntry = 0;
549         }
550
551         fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
552                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
553
554         if (!fStatusEntry) return 0;
555         fStatusEntry->SetOwner(1);
556
557         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
558         if (!status) {
559                 AliError("Invalid object stored to CDB!");
560                 return 0;
561         }
562
563         return status;
564 }
565
566 //______________________________________________________________________________________________
567 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
568 {
569 // writes the status for one subdetector
570
571         if (fStatusEntry){
572                 delete fStatusEntry;
573                 fStatusEntry = 0;
574         }
575
576         Int_t run = GetCurrentRun();
577
578         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
579
580         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
581         fStatusEntry->SetOwner(1);
582
583         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
584
585         if (!result) {
586                 Log("SHUTTLE", Form("WriteShuttleStatus - Failed for %s, run %d",
587                                                 fCurrentDetector.Data(), run));
588                 return kFALSE;
589         }
590         
591         SendMLInfo();
592
593         return kTRUE;
594 }
595
596 //______________________________________________________________________________________________
597 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
598 {
599   // changes the AliShuttleStatus for the given detector and run to the given status
600
601         if (!fStatusEntry){
602                 AliError("UNEXPECTED: fStatusEntry empty");
603                 return;
604         }
605
606         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
607
608         if (!status){
609                 Log("SHUTTLE", "UNEXPECTED: status could not be read from current CDB entry");
610                 return;
611         }
612
613         TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
614                                 fCurrentDetector.Data(),
615                                 status->GetStatusName(),
616                                 status->GetStatusName(newStatus));
617         Log("SHUTTLE", actionStr);
618         SetLastAction(actionStr);
619
620         status->SetStatus(newStatus);
621         if (increaseCount) status->IncreaseCount();
622
623         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
624
625         SendMLInfo();
626 }
627
628 //______________________________________________________________________________________________
629 void AliShuttle::SendMLInfo()
630 {
631         //
632         // sends ML information about the current status of the current detector being processed
633         //
634         
635         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
636         
637         if (!status){
638                 Log("SHUTTLE", "SendMLInfo - UNEXPECTED: status could not be read from current CDB entry");
639                 return;
640         }
641         
642         TMonaLisaText  mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
643         TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
644
645         TList mlList;
646         mlList.Add(&mlStatus);
647         mlList.Add(&mlRetryCount);
648
649         fMonaLisa->SendParameters(&mlList);
650 }
651
652 //______________________________________________________________________________________________
653 Bool_t AliShuttle::ContinueProcessing()
654 {
655 // this function reads the AliShuttleStatus information from CDB and
656 // checks if the processing should be continued
657 // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
658
659         if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
660
661         AliPreprocessor* aPreprocessor =
662                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
663         if (!aPreprocessor)
664         {
665                 AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data()));
666                 return kFALSE;
667         }
668
669         AliShuttleLogbookEntry::Status entryStatus =
670                 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
671
672         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
673                 AliInfo(Form("ContinueProcessing - %s is %s",
674                                 fCurrentDetector.Data(),
675                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
676                 return kFALSE;
677         }
678
679         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
680
681         // check if current run is first unprocessed run for current detector
682         if (fConfig->StrictRunOrder(fCurrentDetector) &&
683                 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
684         {
685                 Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
686                 return kFALSE;
687         }
688
689         AliShuttleStatus* status = ReadShuttleStatus();
690         if (!status) {
691                 // first time
692                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
693                                 fCurrentDetector.Data()));
694                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
695                 return WriteShuttleStatus(status);
696         }
697
698         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
699         // If it happens it may mean Logbook updating failed... let's do it now!
700         if (status->GetStatus() == AliShuttleStatus::kDone ||
701             status->GetStatus() == AliShuttleStatus::kFailed){
702                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
703                                         fCurrentDetector.Data(),
704                                         status->GetStatusName(status->GetStatus())));
705                 UpdateShuttleLogbook(fCurrentDetector.Data(),
706                                         status->GetStatusName(status->GetStatus()));
707                 return kFALSE;
708         }
709
710         if (status->GetStatus() == AliShuttleStatus::kStoreError) {
711                 Log("SHUTTLE",
712                         Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
713                                 fCurrentDetector.Data()));
714                 if(StoreOCDB()){
715                         Log("SHUTTLE", Form("ContinueProcessing - %s: all objects successfully stored into main storage",
716                                 fCurrentDetector.Data()));
717                         UpdateShuttleStatus(AliShuttleStatus::kDone);
718                         UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
719                 } else {
720                         Log("SHUTTLE",
721                                 Form("ContinueProcessing - %s: Grid storage failed again",
722                                         fCurrentDetector.Data()));
723                         // trigger ML information manually because we do not had a status change
724                         SendMLInfo();
725                 }
726                 return kFALSE;
727         }
728
729         // if we get here, there is a restart
730         Bool_t cont = kFALSE;
731
732         // abort conditions
733         if (status->GetCount() >= fConfig->GetMaxRetries()) {
734                 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
735                                 "Updating Shuttle Logbook", fCurrentDetector.Data(),
736                                 status->GetCount(), status->GetStatusName()));
737                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
738                 UpdateShuttleStatus(AliShuttleStatus::kFailed);
739
740                 // there may still be objects in local OCDB and reference storage
741                 // and FXS databases may be not updated: do it now!
742                 CleanLocalStorage(fgkLocalCDB);
743                 CleanLocalStorage(fgkLocalRefStorage);
744                 UpdateTableFailCase();
745
746         } else {
747                 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
748                                 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
749                                 status->GetStatusName(), status->GetCount()));
750                 UpdateShuttleStatus(AliShuttleStatus::kStarted, kTRUE);
751                 cont = kTRUE;
752         }
753
754         // Send mail to detector expert!
755         AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data()));
756         if (!SendMail())
757                 Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
758                                 fCurrentDetector.Data()));
759
760         return cont;
761 }
762
763 //______________________________________________________________________________________________
764 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
765 {
766         //
767         // Makes data retrieval for all detectors in the configuration.
768         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
769         // (Unprocessed, Inactive, Failed or Done).
770         // Returns kFALSE in case of error occured and kTRUE otherwise
771         //
772
773         if(!entry) return kFALSE;
774
775         fLogbookEntry = entry;
776
777         if (fLogbookEntry->IsDone())
778         {
779                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
780                 UpdateShuttleLogbook("shuttle_done");
781                 fLogbookEntry = 0;
782                 return kTRUE;
783         }
784
785         // create ML instance that monitors this run
786         fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
787         // disable monitoring of other parameters that come e.g. from TFile
788         gMonitoringWriter = 0;
789
790         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
791                                         GetCurrentRun()));
792
793         // Set run type from run type logbook into current fLogbookEntry
794         SetRunType();
795
796         // Send the information to ML
797         TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
798
799         TList mlList;
800         mlList.Add(&mlStatus);
801
802         fMonaLisa->SendParameters(&mlList);
803
804         fLogbookEntry->Print("all");
805
806         // Initialization
807         Bool_t hasError = kFALSE;
808
809         AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
810         if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
811         AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
812         if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
813
814         // Loop on detectors in the configuration
815         TIter iter(fConfig->GetDetectors());
816         TObjString* aDetector = 0;
817
818         while ((aDetector = (TObjString*) iter.Next()))
819         {
820                 fCurrentDetector = aDetector->String();
821
822                 if (ContinueProcessing() == kFALSE) continue;
823
824                 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START  ******",
825                                                 GetCurrentRun(), aDetector->GetName()));
826
827                 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
828
829                 Log(fCurrentDetector.Data(), "Starting processing");
830
831                 Int_t pid = fork();
832
833                 if (pid < 0)
834                 {
835                         Log("SHUTTLE", "ERROR: Forking failed");
836                 }
837                 else if (pid > 0)
838                 {
839                         // parent
840                         AliInfo(Form("In parent process of %d - %s: Starting monitoring",
841                                                         GetCurrentRun(), aDetector->GetName()));
842
843                         Long_t begin = time(0);
844
845                         int status; // to be used with waitpid, on purpose an int (not Int_t)!
846                         while (waitpid(pid, &status, WNOHANG) == 0)
847                         {
848                                 Long_t expiredTime = time(0) - begin;
849
850                                 if (expiredTime > fConfig->GetPPTimeOut())
851                                 {
852                                         Log("SHUTTLE", Form("%s: Process time out. Run time: %d seconds. Killing...",
853                                                                 fCurrentDetector.Data(), expiredTime));
854
855                                         kill(pid, 9);
856
857                                         UpdateShuttleStatus(AliShuttleStatus::kPPTimeOut);
858                                         hasError = kTRUE;
859
860                                         gSystem->Sleep(1000);
861                                 }
862                                 else
863                                 {
864                                         if (expiredTime % 60 == 0)
865                                         Log("SHUTTLE", Form("Checked process. Run time: %d seconds.",
866                                                                 expiredTime));
867                                         gSystem->Sleep(1000);
868                                 }
869                         }
870
871                         AliInfo(Form("In parent process of %d - %s: Client has terminated.",
872                                                                 GetCurrentRun(), aDetector->GetName()));
873
874                         if (WIFEXITED(status))
875                         {
876                                 Int_t returnCode = WEXITSTATUS(status);
877
878                                 Log("SHUTTLE", Form("%s: the return code is %d", fCurrentDetector.Data(),
879                                                                                 returnCode));
880
881                                 if (returnCode != 0) hasError = kTRUE;
882                         }
883                 }
884                 else if (pid == 0)
885                 {
886                         // client
887                         AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
888
889                         UInt_t returnCode = ProcessCurrentDetector(); // 0 means success
890
891
892                         if (returnCode > 0) // TODO: returnCode>0 means Preprocessor ERROR! Preprocessors should follow this!
893                         {
894                                 if (returnCode == kDCSErrCode)
895                                 {
896                                         AliInfo(Form("\n \t\t\t****** run %d - %s: DCS ERROR ****** \n\n",
897                                                         GetCurrentRun(), aDetector->GetName()));
898                                 } else {
899
900                                         AliInfo(Form("\n \t\t\t****** run %d - %s: PREPROCESSOR ERROR ****** \n\n",
901                                                         GetCurrentRun(), aDetector->GetName()));
902                                 }
903                         } else { // Preprocessor finished successfully!
904
905                                 // Update time_processed field in FXS DB
906                                 if (UpdateTable() == kFALSE)
907                                         Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!"));
908
909                                 // Transfer the data from local storage to main storage (Grid)
910                                 UpdateShuttleStatus(AliShuttleStatus::kStoreStarted);
911                                 if (StoreOCDB() == kFALSE)
912                                 {
913                                         AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
914                                                         GetCurrentRun(), aDetector->GetName()));
915                                         UpdateShuttleStatus(AliShuttleStatus::kStoreError);
916                                         returnCode = kStorErrCode;
917                                 } else {
918                                         AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
919                                                         GetCurrentRun(), aDetector->GetName()));
920                                         UpdateShuttleStatus(AliShuttleStatus::kDone);
921                                 }
922                         }
923
924                         for (UInt_t iSys=0; iSys<3; iSys++)
925                         {
926                                 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
927                         }
928
929                         AliInfo(Form("Client process of %d - %s is exiting now with %d.",
930                                                         GetCurrentRun(), aDetector->GetName(), returnCode));
931
932                         // the client exits here
933                         gSystem->Exit(returnCode);
934
935                         AliError("We should never get here!!!");
936                 }
937         }
938
939         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
940                                                         GetCurrentRun()));
941
942         //check if shuttle is done for this run, if so update logbook
943         TObjArray checkEntryArray;
944         checkEntryArray.SetOwner(1);
945         TString whereClause = Form("where run=%d", GetCurrentRun());
946         if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
947                 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
948                                                 GetCurrentRun()));
949                 return hasError == kFALSE;
950         }
951
952         AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
953                                                 (checkEntryArray.At(0));
954
955         if (checkEntry)
956         {
957                 if (checkEntry->IsDone())
958                 {
959                         Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
960                         UpdateShuttleLogbook("shuttle_done");
961                 }
962                 else
963                 {
964                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
965                         {
966                                 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
967                                 {
968                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
969                                                         checkEntry->GetRun(), GetDetName(iDet)));
970                                         fFirstUnprocessed[iDet] = kFALSE;
971                                 }
972                         }
973                 }
974         }
975
976         // remove ML instance
977         delete fMonaLisa;
978         fMonaLisa = 0;
979
980         fLogbookEntry = 0;
981
982         return hasError == kFALSE;
983 }
984
985 //______________________________________________________________________________________________
986 UInt_t AliShuttle::ProcessCurrentDetector()
987 {
988         //
989         // Makes data retrieval just for a specific detector (fCurrentDetector).
990         // Threre should be a configuration for this detector.
991
992         AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
993
994         TMap dcsMap;
995         dcsMap.SetOwner(1);
996
997         Bool_t aDCSError = kFALSE;
998
999         // call preprocessor
1000         AliPreprocessor* aPreprocessor =
1001                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
1002
1003         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
1004
1005         Bool_t processDCS = aPreprocessor->ProcessDCS();
1006
1007         // TODO Test only... I've added a flag that allows to
1008         // exclude DCS archive DB query
1009         if (!processDCS || !fgkProcessDCS)
1010         {
1011                 AliInfo("Skipping DCS processing!");
1012                 aDCSError = kFALSE;
1013         } else {
1014
1015                 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
1016
1017                 TString host(fConfig->GetDCSHost(fCurrentDetector));
1018                 Int_t port = fConfig->GetDCSPort(fCurrentDetector);
1019
1020                 // Retrieval of Aliases
1021                 TObjString* anAlias = 0;
1022                 Int_t iAlias = 1;
1023                 Int_t nTotAliases= ((TMap*)fConfig->GetDCSAliases(fCurrentDetector))->GetEntries();
1024                 TIter iterAliases(fConfig->GetDCSAliases(fCurrentDetector));
1025                 while ((anAlias = (TObjString*) iterAliases.Next()))
1026                 {
1027                         TObjArray *valueSet = new TObjArray();
1028                         valueSet->SetOwner(1);
1029
1030                         if (((iAlias-1) % 500) == 0 || iAlias == nTotAliases)
1031                                 AliInfo(Form("Querying DCS archive: alias %s (%d of %d)",
1032                                                 anAlias->GetName(), iAlias++, nTotAliases));
1033                         aDCSError = (GetValueSet(host, port, anAlias->String(), valueSet, kAlias) == 0);
1034
1035                         if(!aDCSError)
1036                         {
1037                                 dcsMap.Add(anAlias->Clone(), valueSet);
1038                         } else {
1039                                 Log(fCurrentDetector,
1040                                         Form("ProcessCurrentDetector - Error while retrieving alias %s",
1041                                                 anAlias->GetName()));
1042                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1043                                 dcsMap.DeleteAll();
1044                                 return kDCSErrCode;
1045                         }
1046                 }
1047
1048                 // Retrieval of Data Points
1049                 TObjString* aDP = 0;
1050                 Int_t iDP = 0;
1051                 Int_t nTotDPs= ((TMap*)fConfig->GetDCSDataPoints(fCurrentDetector))->GetEntries();
1052                 TIter iterDP(fConfig->GetDCSDataPoints(fCurrentDetector));
1053                 while ((aDP = (TObjString*) iterDP.Next()))
1054                 {
1055                         TObjArray *valueSet = new TObjArray();
1056                         valueSet->SetOwner(1);
1057                         if (((iDP-1) % 500) == 0 || iDP == nTotDPs)
1058                                 AliInfo(Form("Querying DCS archive: DP %s (%d of %d)",
1059                                                 aDP->GetName(), iDP++, nTotDPs));
1060                         aDCSError = (GetValueSet(host, port, aDP->String(), valueSet, kDP) == 0);
1061
1062                         if(!aDCSError)
1063                         {
1064                                 dcsMap.Add(aDP->Clone(), valueSet);
1065                         } else {
1066                                 Log(fCurrentDetector,
1067                                         Form("ProcessCurrentDetector - Error while retrieving data point %s",
1068                                                 aDP->GetName()));
1069                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
1070                                 dcsMap.DeleteAll();
1071                                 return kDCSErrCode;
1072                         }
1073                 }
1074         }
1075
1076         // DCS Archive DB processing successful. Call Preprocessor!
1077         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
1078
1079         UInt_t returnValue = aPreprocessor->Process(&dcsMap);
1080
1081         if (returnValue > 0) // Preprocessor error!
1082         {
1083                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
1084
1085         } else { // preprocessor ok!
1086                 UpdateShuttleStatus(AliShuttleStatus::kPPDone);
1087                 UpdateShuttleLogbook(fCurrentDetector, "DONE");
1088                 Log("SHUTTLE", Form("ProcessCurrentDetector - %s preprocessor returned success",
1089                                         fCurrentDetector.Data()));
1090         }
1091
1092         dcsMap.DeleteAll();
1093
1094         return returnValue;
1095 }
1096
1097 //______________________________________________________________________________________________
1098 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
1099                 TObjArray& entries)
1100 {
1101 // Query DAQ's Shuttle logbook and fills detector status object.
1102 // Call QueryRunParameters to query DAQ logbook for run parameters.
1103
1104         entries.SetOwner(1);
1105
1106         // check connection, in case connect
1107         if(!Connect(3)) return kFALSE;
1108
1109         TString sqlQuery;
1110         sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
1111
1112         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1113         if (!aResult) {
1114                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1115                 return kFALSE;
1116         }
1117
1118         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
1119
1120         if(aResult->GetRowCount() == 0) {
1121 //              if(sqlQuery.EndsWith("where shuttle_done=0 order by run")){
1122 //                      Log("SHUTTLE", "QueryShuttleLogbook - All runs in Shuttle Logbook are already DONE");
1123 //                      delete aResult;
1124 //                      return kTRUE;
1125 //              } else {
1126                         AliInfo("No entries in Shuttle Logbook match request");
1127                         delete aResult;
1128                         return kTRUE;
1129 //              }
1130         }
1131
1132         // TODO Check field count!
1133         const UInt_t nCols = 22;
1134         if (aResult->GetFieldCount() != (Int_t) nCols) {
1135                 AliError("Invalid SQL result field number!");
1136                 delete aResult;
1137                 return kFALSE;
1138         }
1139
1140         TSQLRow* aRow;
1141         while ((aRow = aResult->Next())) {
1142                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
1143                 Int_t run = runString.Atoi();
1144
1145                 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
1146                 if (!entry)
1147                         continue;
1148
1149                 // loop on detectors
1150                 for(UInt_t ii = 0; ii < nCols; ii++)
1151                         entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
1152
1153                 entries.AddLast(entry);
1154                 delete aRow;
1155         }
1156
1157 //      if(sqlQuery.EndsWith("where shuttle_done=0 order by run"))
1158 //              Log("SHUTTLE", Form("QueryShuttleLogbook - Found %d unprocessed runs in Shuttle Logbook",
1159 //                                                      entries.GetEntriesFast()));
1160         delete aResult;
1161         return kTRUE;
1162 }
1163
1164 //______________________________________________________________________________________________
1165 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
1166 {
1167         //
1168         // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
1169         //
1170
1171         // check connection, in case connect
1172         if (!Connect(3))
1173                 return 0;
1174
1175         TString sqlQuery;
1176         sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
1177
1178         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
1179         if (!aResult) {
1180                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
1181                 return 0;
1182         }
1183
1184         if (aResult->GetRowCount() == 0) {
1185                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
1186                 delete aResult;
1187                 return 0;
1188         }
1189
1190         if (aResult->GetRowCount() > 1) {
1191                 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
1192                 delete aResult;
1193                 return 0;
1194         }
1195
1196         TSQLRow* aRow = aResult->Next();
1197         if (!aRow)
1198         {
1199                 AliError(Form("Could not retrieve row for run %d. Skipping", run));
1200                 delete aResult;
1201                 return 0;
1202         }
1203
1204         AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
1205
1206         for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
1207                 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
1208
1209         UInt_t startTime = entry->GetStartTime();
1210         UInt_t endTime = entry->GetEndTime();
1211
1212         if (!startTime || !endTime || startTime > endTime) {
1213                 Log("SHUTTLE",
1214                         Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
1215                                 run, startTime, endTime));
1216                 delete entry;
1217                 delete aRow;
1218                 delete aResult;
1219                 return 0;
1220         }
1221
1222         delete aRow;
1223         delete aResult;
1224
1225         return entry;
1226 }
1227
1228 //______________________________________________________________________________________________
1229 Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry,
1230                                 TObjArray* valueSet, DCSType type)
1231 {
1232 // Retrieve all "entry" data points from the DCS server
1233 // host, port: TSocket connection parameters
1234 // entry: name of the alias or data point
1235 // valueSet: array of retrieved AliDCSValue's
1236 // type: kAlias or kDP
1237
1238         AliDCSClient client(host, port, fTimeout, fRetries);
1239         if (!client.IsConnected())
1240         {
1241                 return kFALSE;
1242         }
1243
1244         Int_t result=0;
1245
1246         if (type == kAlias)
1247         {
1248                 result = client.GetAliasValues(entry,
1249                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1250         } else
1251         if (type == kDP)
1252         {
1253                 result = client.GetDPValues(entry,
1254                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1255         }
1256
1257         if (result < 0)
1258         {
1259                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
1260                         entry, AliDCSClient::GetErrorString(result)));
1261
1262                 if (result == AliDCSClient::fgkServerError)
1263                 {
1264                         Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
1265                                 client.GetServerError().Data()));
1266                 }
1267
1268                 return kFALSE;
1269         }
1270
1271         return kTRUE;
1272 }
1273
1274 //______________________________________________________________________________________________
1275 const char* AliShuttle::GetFile(Int_t system, const char* detector,
1276                 const char* id, const char* source)
1277 {
1278 // Get calibration file from file exchange servers
1279 // First queris the FXS database for the file name, using the run, detector, id and source info
1280 // then calls RetrieveFile(filename) for actual copy to local disk
1281 // run: current run being processed (given by Logbook entry fLogbookEntry)
1282 // detector: the Preprocessor name
1283 // id: provided as a parameter by the Preprocessor
1284 // source: provided by the Preprocessor through GetFileSources function
1285
1286         // check connection, in case connect
1287         if (!Connect(system))
1288         {
1289                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1290                 return 0;
1291         }
1292
1293         // Query preparation
1294         TString sourceName(source);
1295         Int_t nFields = 3;
1296         TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
1297                                                                 fConfig->GetFXSdbTable(system));
1298         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1299                                                                 GetCurrentRun(), detector, id);
1300
1301         if (system == kDAQ)
1302         {
1303                 whereClause += Form(" and DAQsource=\"%s\"", source);
1304         }
1305         else if (system == kDCS)
1306         {
1307                 sourceName="none";
1308         }
1309         else if (system == kHLT)
1310         {
1311                 whereClause += Form(" and DDLnumbers=\"%s\"", source);
1312                 nFields = 3;
1313         }
1314
1315         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1316
1317         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1318
1319         // Query execution
1320         TSQLResult* aResult = 0;
1321         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1322         if (!aResult) {
1323                 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
1324                                 GetSystemName(system), id, sourceName.Data()));
1325                 return 0;
1326         }
1327
1328         if(aResult->GetRowCount() == 0)
1329         {
1330                 Log(detector,
1331                         Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
1332                                 GetSystemName(system), id, sourceName.Data()));
1333                 delete aResult;
1334                 return 0;
1335         }
1336
1337         if (aResult->GetRowCount() > 1) {
1338                 Log(detector,
1339                         Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
1340                                 GetSystemName(system), id, sourceName.Data()));
1341                 delete aResult;
1342                 return 0;
1343         }
1344
1345         if (aResult->GetFieldCount() != nFields) {
1346                 Log(detector,
1347                         Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
1348                                 GetSystemName(system), id, sourceName.Data()));
1349                 delete aResult;
1350                 return 0;
1351         }
1352
1353         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1354
1355         if (!aRow){
1356                 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
1357                                 GetSystemName(system), id, sourceName.Data()));
1358                 delete aResult;
1359                 return 0;
1360         }
1361
1362         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1363         TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
1364         TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
1365
1366         delete aResult;
1367         delete aRow;
1368
1369         AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
1370                                 filePath.Data(), fileSize.Data(), fileChecksum.Data()));
1371
1372         // retrieved file is renamed to make it unique
1373         TString localFileName = Form("%s_%s_%d_%s_%s.shuttle",
1374                                         GetSystemName(system), detector, GetCurrentRun(), id, sourceName.Data());
1375
1376
1377         // file retrieval from FXS
1378         UInt_t nRetries = 0;
1379         UInt_t maxRetries = 3;
1380         Bool_t result = kFALSE;
1381
1382         // copy!! if successful TSystem::Exec returns 0
1383         while(nRetries++ < maxRetries) {
1384                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1385                 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
1386                 if(!result)
1387                 {
1388                         Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
1389                                         filePath.Data(), GetSystemName(system)));
1390                         continue;
1391                 } else {
1392                         AliInfo(Form("File %s copied from %s FXS into %s/%s",
1393                                                 filePath.Data(), GetSystemName(system),
1394                                                 GetShuttleTempDir(), localFileName.Data()));
1395                 }
1396
1397                 if (fileChecksum.Length()>0)
1398                 {
1399                         // compare md5sum of local file with the one stored in the FXS DB
1400                         Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null",
1401                                                 GetShuttleTempDir(), localFileName.Data(), fileChecksum.Data()));
1402
1403                         if (md5Comp != 0)
1404                         {
1405                                 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
1406                                                         filePath.Data()));
1407                                 result = kFALSE;
1408                                 continue;
1409                         }
1410                 } else {
1411                         Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
1412                                                         filePath.Data(), GetSystemName(system)));
1413                 }
1414                 if (result) break;
1415         }
1416
1417         if(!result) return 0;
1418
1419         fFXSCalled[system]=kTRUE;
1420         TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
1421         fFXSlist[system].Add(fileParams);
1422
1423         static TString fullLocalFileName;
1424         fullLocalFileName = TString::Format("%s/%s", GetShuttleTempDir(), localFileName.Data());
1425
1426         AliInfo(Form("fullLocalFileName = %s", fullLocalFileName.Data()));
1427
1428         return fullLocalFileName.Data();
1429
1430 }
1431
1432 //______________________________________________________________________________________________
1433 Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
1434 {
1435 // Copies file from FXS to local Shuttle machine
1436
1437         // check temp directory: trying to cd to temp; if it does not exist, create it
1438         AliDebug(2, Form("Copy file %s from %s FXS into %s/%s",
1439                         GetSystemName(system), fxsFileName, GetShuttleTempDir(), localFileName));
1440
1441         void* dir = gSystem->OpenDirectory(GetShuttleTempDir());
1442         if (dir == NULL) {
1443                 if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) {
1444                         AliError(Form("Can't open directory <%s>", GetShuttleTempDir()));
1445                         return kFALSE;
1446                 }
1447
1448         } else {
1449                 gSystem->FreeDirectory(dir);
1450         }
1451
1452         TString baseFXSFolder;
1453         if (system == kDAQ)
1454         {
1455                 baseFXSFolder = "FES/";
1456         }
1457         else if (system == kDCS)
1458         {
1459                 baseFXSFolder = "";
1460         }
1461         else if (system == kHLT)
1462         {
1463                 baseFXSFolder = "~/";
1464         }
1465
1466
1467         TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s/%s",
1468                 fConfig->GetFXSPort(system),
1469                 fConfig->GetFXSUser(system),
1470                 fConfig->GetFXSHost(system),
1471                 baseFXSFolder.Data(),
1472                 fxsFileName,
1473                 GetShuttleTempDir(),
1474                 localFileName);
1475
1476         AliDebug(2, Form("%s",command.Data()));
1477
1478         Bool_t result = (gSystem->Exec(command.Data()) == 0);
1479
1480         return result;
1481 }
1482
1483 //______________________________________________________________________________________________
1484 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
1485 {
1486 // Get sources producing the condition file Id from file exchange servers
1487
1488         if (system == kDCS)
1489         {
1490                 AliError("DCS system has only one source of data!");
1491                 return NULL;
1492
1493         }
1494
1495         // check connection, in case connect
1496         if (!Connect(system))
1497         {
1498                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1499                 return NULL;
1500         }
1501
1502         TString sourceName = 0;
1503         if (system == kDAQ)
1504         {
1505                 sourceName = "DAQsource";
1506         } else if (system == kHLT)
1507         {
1508                 sourceName = "DDLnumbers";
1509         }
1510
1511         TString sqlQueryStart = Form("select %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
1512         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1513                                 GetCurrentRun(), detector, id);
1514         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1515
1516         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1517
1518         // Query execution
1519         TSQLResult* aResult;
1520         aResult = fServer[system]->Query(sqlQuery);
1521         if (!aResult) {
1522                 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
1523                                 GetSystemName(system), id));
1524                 return 0;
1525         }
1526
1527         if (aResult->GetRowCount() == 0)
1528         {
1529                 Log(detector,
1530                         Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
1531                 delete aResult;
1532                 return 0;
1533         }
1534
1535         TSQLRow* aRow;
1536         TList *list = new TList();
1537         list->SetOwner(1);
1538
1539         while ((aRow = aResult->Next()))
1540         {
1541
1542                 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
1543                 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
1544                 list->Add(new TObjString(source));
1545                 delete aRow;
1546         }
1547
1548         delete aResult;
1549
1550         return list;
1551 }
1552
1553 //______________________________________________________________________________________________
1554 Bool_t AliShuttle::Connect(Int_t system)
1555 {
1556 // Connect to MySQL Server of the system's FXS MySQL databases
1557 // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
1558
1559         // check connection: if already connected return
1560         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
1561
1562         TString dbHost, dbUser, dbPass, dbName;
1563
1564         if (system < 3) // FXS db servers
1565         {
1566                 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
1567                 dbUser = fConfig->GetFXSdbUser(system);
1568                 dbPass = fConfig->GetFXSdbPass(system);
1569                 dbName =   fConfig->GetFXSdbName(system);
1570         } else { // Run & Shuttle logbook servers
1571         // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
1572                 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
1573                 dbUser = fConfig->GetDAQlbUser();
1574                 dbPass = fConfig->GetDAQlbPass();
1575                 dbName =   fConfig->GetDAQlbDB();
1576         }
1577
1578         fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
1579         if (!fServer[system] || !fServer[system]->IsConnected()) {
1580                 if(system < 3)
1581                 {
1582                 AliError(Form("Can't establish connection to FXS database for %s",
1583                                         AliShuttleInterface::GetSystemName(system)));
1584                 } else {
1585                 AliError("Can't establish connection to Run logbook.");
1586                 }
1587                 if(fServer[system]) delete fServer[system];
1588                 return kFALSE;
1589         }
1590
1591         // Get tables
1592         TSQLResult* aResult=0;
1593         switch(system){
1594                 case kDAQ:
1595                         aResult = fServer[kDAQ]->GetTables(dbName.Data());
1596                         break;
1597                 case kDCS:
1598                         aResult = fServer[kDCS]->GetTables(dbName.Data());
1599                         break;
1600                 case kHLT:
1601                         aResult = fServer[kHLT]->GetTables(dbName.Data());
1602                         break;
1603                 default:
1604                         aResult = fServer[3]->GetTables(dbName.Data());
1605                         break;
1606         }
1607
1608         delete aResult;
1609         return kTRUE;
1610 }
1611
1612 //______________________________________________________________________________________________
1613 Bool_t AliShuttle::UpdateTable()
1614 {
1615 // Update FXS table filling time_processed field in all rows corresponding to current run and detector
1616
1617         Bool_t result = kTRUE;
1618
1619         for (UInt_t system=0; system<3; system++)
1620         {
1621                 if(!fFXSCalled[system]) continue;
1622
1623                 // check connection, in case connect
1624                 if (!Connect(system))
1625                 {
1626                         Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
1627                         result = kFALSE;
1628                         continue;
1629                 }
1630
1631                 TTimeStamp now; // now
1632
1633                 // Loop on FXS list entries
1634                 TIter iter(&fFXSlist[system]);
1635                 TObjString *aFXSentry=0;
1636                 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
1637                 {
1638                         TString aFXSentrystr = aFXSentry->String();
1639                         TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
1640                         if (!aFXSarray || aFXSarray->GetEntries() != 2 )
1641                         {
1642                                 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
1643                                         GetSystemName(system), aFXSentrystr.Data()));
1644                                 if(aFXSarray) delete aFXSarray;
1645                                 result = kFALSE;
1646                                 continue;
1647                         }
1648                         const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
1649                         const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
1650
1651                         TString whereClause;
1652                         if (system == kDAQ)
1653                         {
1654                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
1655                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
1656                         }
1657                         else if (system == kDCS)
1658                         {
1659                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
1660                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId);
1661                         }
1662                         else if (system == kHLT)
1663                         {
1664                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
1665                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
1666                         }
1667
1668                         delete aFXSarray;
1669
1670                         TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
1671                                                                 now.GetSec(), whereClause.Data());
1672
1673                         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1674
1675                         // Query execution
1676                         TSQLResult* aResult;
1677                         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1678                         if (!aResult)
1679                         {
1680                                 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
1681                                                                 GetSystemName(system), sqlQuery.Data()));
1682                                 result = kFALSE;
1683                                 continue;
1684                         }
1685                         delete aResult;
1686                 }
1687         }
1688
1689         return result;
1690 }
1691
1692 //______________________________________________________________________________________________
1693 Bool_t AliShuttle::UpdateTableFailCase()
1694 {
1695 // Update FXS table filling time_processed field in all rows corresponding to current run and detector
1696 // this is called in case the preprocessor is declared failed for the current run, because
1697 // the fields are updated only in case of success
1698
1699         Bool_t result = kTRUE;
1700
1701         for (UInt_t system=0; system<3; system++)
1702         {
1703                 // check connection, in case connect
1704                 if (!Connect(system))
1705                 {
1706                         Log(fCurrentDetector, Form("UpdateTableFailCase - Couldn't connect to %s FXS database",
1707                                                         GetSystemName(system)));
1708                         result = kFALSE;
1709                         continue;
1710                 }
1711
1712                 TTimeStamp now; // now
1713
1714                 // Loop on FXS list entries
1715
1716                 TString whereClause = Form("where run=%d and detector=\"%s\";",
1717                                                 GetCurrentRun(), fCurrentDetector.Data());
1718
1719
1720                 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
1721                                                         now.GetSec(), whereClause.Data());
1722
1723                 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1724
1725                 // Query execution
1726                 TSQLResult* aResult;
1727                 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1728                 if (!aResult)
1729                 {
1730                         Log(fCurrentDetector, Form("UpdateTableFailCase - %s db: can't execute SQL query <%s>",
1731                                                         GetSystemName(system), sqlQuery.Data()));
1732                         result = kFALSE;
1733                         continue;
1734                 }
1735                 delete aResult;
1736         }
1737
1738         return result;
1739 }
1740
1741 //______________________________________________________________________________________________
1742 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
1743 {
1744         //
1745         // Update Shuttle logbook filling detector or shuttle_done column
1746         // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
1747         //
1748
1749         // check connection, in case connect
1750         if(!Connect(3)){
1751                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
1752                 return kFALSE;
1753         }
1754
1755         TString detName(detector);
1756         TString setClause;
1757         if(detName == "shuttle_done")
1758         {
1759                 setClause = "set shuttle_done=1";
1760
1761                 // Send the information to ML
1762                 TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
1763
1764                 TList mlList;
1765                 mlList.Add(&mlStatus);
1766
1767                 fMonaLisa->SendParameters(&mlList);
1768         } else {
1769                 TString statusStr(status);
1770                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
1771                    statusStr.Contains("failed", TString::kIgnoreCase)){
1772                         setClause = Form("set %s=\"%s\"", detector, status);
1773                 } else {
1774                         Log("SHUTTLE",
1775                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
1776                                         status, detector));
1777                         return kFALSE;
1778                 }
1779         }
1780
1781         TString whereClause = Form("where run=%d", GetCurrentRun());
1782
1783         TString sqlQuery = Form("update %s %s %s",
1784                                         fConfig->GetShuttlelbTable(), setClause.Data(), whereClause.Data());
1785
1786         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1787
1788         // Query execution
1789         TSQLResult* aResult;
1790         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
1791         if (!aResult) {
1792                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
1793                 return kFALSE;
1794         }
1795         delete aResult;
1796
1797         return kTRUE;
1798 }
1799
1800 //______________________________________________________________________________________________
1801 Int_t AliShuttle::GetCurrentRun() const
1802 {
1803 // Get current run from logbook entry
1804
1805         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
1806 }
1807
1808 //______________________________________________________________________________________________
1809 UInt_t AliShuttle::GetCurrentStartTime() const
1810 {
1811 // get current start time
1812
1813         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
1814 }
1815
1816 //______________________________________________________________________________________________
1817 UInt_t AliShuttle::GetCurrentEndTime() const
1818 {
1819 // get current end time from logbook entry
1820
1821         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
1822 }
1823
1824 //______________________________________________________________________________________________
1825 void AliShuttle::Log(const char* detector, const char* message)
1826 {
1827 // Fill log string with a message
1828
1829         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
1830         if (dir == NULL) {
1831                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
1832                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
1833                         return;
1834                 }
1835
1836         } else {
1837                 gSystem->FreeDirectory(dir);
1838         }
1839
1840         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
1841         if (GetCurrentRun() >= 0) 
1842                 toLog += Form("run %d - ", GetCurrentRun());
1843         toLog += Form("%s", message);
1844
1845         AliInfo(toLog.Data());
1846
1847         TString fileName;
1848         if (GetCurrentRun() >= 0) 
1849                 fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
1850         else
1851                 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
1852         
1853         gSystem->ExpandPathName(fileName);
1854
1855         ofstream logFile;
1856         logFile.open(fileName, ofstream::out | ofstream::app);
1857
1858         if (!logFile.is_open()) {
1859                 AliError(Form("Could not open file %s", fileName.Data()));
1860                 return;
1861         }
1862
1863         logFile << toLog.Data() << "\n";
1864
1865         logFile.close();
1866 }
1867
1868 //______________________________________________________________________________________________
1869 Bool_t AliShuttle::Collect(Int_t run)
1870 {
1871 //
1872 // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
1873 // If a dedicated run is given this run is processed
1874 //
1875 // In operational mode, this is the Shuttle function triggered by the EOR signal.
1876 //
1877
1878         if (run == -1)
1879                 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
1880         else
1881                 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
1882
1883         SetLastAction("Starting");
1884
1885         TString whereClause("where shuttle_done=0");
1886         if (run != -1)
1887                 whereClause += Form(" and run=%d", run);
1888
1889         TObjArray shuttleLogbookEntries;
1890         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
1891         {
1892                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1893                 return kFALSE;
1894         }
1895
1896         if (shuttleLogbookEntries.GetEntries() == 0)
1897         {
1898                 if (run == -1)
1899                         Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
1900                 else
1901                         Log("SHUTTLE", Form("Collect - Run %d is already DONE "
1902                                                 "or it does not exist in Shuttle logbook", run));
1903                 return kTRUE;
1904         }
1905
1906         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1907                 fFirstUnprocessed[iDet] = kTRUE;
1908
1909         if (run != -1)
1910         {
1911                 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
1912                 // flag them into fFirstUnprocessed array
1913                 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
1914                 TObjArray tmpLogbookEntries;
1915                 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
1916                 {
1917                         Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1918                         return kFALSE;
1919                 }
1920
1921                 TIter iter(&tmpLogbookEntries);
1922                 AliShuttleLogbookEntry* anEntry = 0;
1923                 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
1924                 {
1925                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1926                         {
1927                                 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1928                                 {
1929                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1930                                                         anEntry->GetRun(), GetDetName(iDet)));
1931                                         fFirstUnprocessed[iDet] = kFALSE;
1932                                 }
1933                         }
1934
1935                 }
1936
1937         }
1938
1939         if (!RetrieveConditionsData(shuttleLogbookEntries))
1940         {
1941                 Log("SHUTTLE", "Collect - Process of at least one run failed");
1942                 return kFALSE;
1943         }
1944
1945         Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
1946         return kTRUE;
1947 }
1948
1949 //______________________________________________________________________________________________
1950 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
1951 {
1952 // Retrieve conditions data for all runs that aren't processed yet
1953
1954         Bool_t hasError = kFALSE;
1955
1956         TIter iter(&dateEntries);
1957         AliShuttleLogbookEntry* anEntry;
1958
1959         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
1960                 if (!Process(anEntry)){
1961                         hasError = kTRUE;
1962                 }
1963
1964                 // clean SHUTTLE temp directory
1965                 TString filename = Form("%s/*.shuttle", GetShuttleTempDir());
1966                 RemoveFile(filename.Data());
1967         }
1968
1969         return hasError == kFALSE;
1970 }
1971
1972 //______________________________________________________________________________________________
1973 ULong_t AliShuttle::GetTimeOfLastAction() const
1974 {
1975         ULong_t tmp;
1976
1977         fMonitoringMutex->Lock();
1978
1979         tmp = fLastActionTime;
1980
1981         fMonitoringMutex->UnLock();
1982
1983         return tmp;
1984 }
1985
1986 //______________________________________________________________________________________________
1987 const TString AliShuttle::GetLastAction() const
1988 {
1989         // returns a string description of the last action
1990
1991         TString tmp;
1992
1993         fMonitoringMutex->Lock();
1994         
1995         tmp = fLastAction;
1996         
1997         fMonitoringMutex->UnLock();
1998
1999         return tmp;
2000 }
2001
2002 //______________________________________________________________________________________________
2003 void AliShuttle::SetLastAction(const char* action)
2004 {
2005         // updates the monitoring variables
2006
2007         fMonitoringMutex->Lock();
2008
2009         fLastAction = action;
2010         fLastActionTime = time(0);
2011         
2012         fMonitoringMutex->UnLock();
2013 }
2014
2015 //______________________________________________________________________________________________
2016 const char* AliShuttle::GetRunParameter(const char* param)
2017 {
2018 // returns run parameter read from DAQ logbook
2019
2020         if(!fLogbookEntry) {
2021                 AliError("No logbook entry!");
2022                 return 0;
2023         }
2024
2025         return fLogbookEntry->GetRunParameter(param);
2026 }
2027
2028 //______________________________________________________________________________________________
2029 AliCDBEntry* AliShuttle::GetFromOCDB(const AliCDBPath& path)
2030 {
2031 // returns obiect from OCDB valid for current run
2032
2033         AliCDBStorage *sto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
2034         if (!sto)
2035         {
2036                 Log("SHUTTLE", "GetFromOCDB - Cannot activate main OCDB for query!");
2037                 return 0;
2038         }
2039
2040         return dynamic_cast<AliCDBEntry*> (sto->Get(path, GetCurrentRun()));
2041 }
2042
2043 //______________________________________________________________________________________________
2044 Bool_t AliShuttle::SendMail()
2045 {
2046 // sends a mail to the subdetector expert in case of preprocessor error
2047
2048         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
2049         if (dir == NULL)
2050         {
2051                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
2052                 {
2053                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
2054                         return kFALSE;
2055                 }
2056
2057         } else {
2058                 gSystem->FreeDirectory(dir);
2059         }
2060
2061         TString bodyFileName;
2062         bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
2063         gSystem->ExpandPathName(bodyFileName);
2064
2065         ofstream mailBody;
2066         mailBody.open(bodyFileName, ofstream::out);
2067
2068         if (!mailBody.is_open())
2069         {
2070                 AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
2071                 return kFALSE;
2072         }
2073
2074         TString to="";
2075         TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
2076         TObjString *anExpert=0;
2077         while ((anExpert = (TObjString*) iterExperts.Next()))
2078         {
2079                 to += Form("%s,", anExpert->GetName());
2080         }
2081         to.Remove(to.Length()-1);
2082         AliDebug(2, Form("to: %s",to.Data()));
2083
2084         // TODO this will be removed...
2085         if (to.Contains("not_yet_set")) {
2086                 AliInfo("List of detector responsibles not yet set!");
2087                 return kFALSE;
2088         }
2089
2090         TString cc="alberto.colla@cern.ch";
2091
2092         TString subject = Form("%s Shuttle preprocessor error in run %d !",
2093                                 fCurrentDetector.Data(), GetCurrentRun());
2094         AliDebug(2, Form("subject: %s", subject.Data()));
2095
2096         TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
2097         body += Form("SHUTTLE just detected that your preprocessor "
2098                         "exited with ERROR state in run %d!!\n\n", GetCurrentRun());
2099         body += Form("Please check %s status on the web page asap!\n\n", fCurrentDetector.Data());
2100         body += Form("The last 10 lines of %s log file are following:\n\n");
2101
2102         AliDebug(2, Form("Body begin: %s", body.Data()));
2103
2104         mailBody << body.Data();
2105         mailBody.close();
2106         mailBody.open(bodyFileName, ofstream::out | ofstream::app);
2107
2108         TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
2109         TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
2110         if (gSystem->Exec(tailCommand.Data()))
2111         {
2112                 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
2113         }
2114
2115         TString endBody = Form("------------------------------------------------------\n\n");
2116         endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
2117         endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
2118         endBody += "Sincerely yours,\n\n \t\t\tthe SHUTTLE\n";
2119
2120         AliDebug(2, Form("Body end: %s", endBody.Data()));
2121
2122         mailBody << endBody.Data();
2123
2124         mailBody.close();
2125
2126         // send mail!
2127         TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
2128                                                 subject.Data(),
2129                                                 cc.Data(),
2130                                                 to.Data(),
2131                                                 bodyFileName.Data());
2132         AliDebug(2, Form("mail command: %s", mailCommand.Data()));
2133
2134         Bool_t result = gSystem->Exec(mailCommand.Data());
2135
2136         return result == 0;
2137 }
2138
2139 //______________________________________________________________________________________________
2140 void AliShuttle::SetRunType()
2141 {
2142 // Gets run type from logbook and fills current Shuttle logbook entry
2143
2144         // check connection, in case connect
2145         if(!Connect(3)){
2146                 Log("SHUTTLE", "SetRunType - Couldn't connect to DAQ Logbook.");
2147                 return;
2148         }
2149
2150         TString sqlQuery = Form("select detector,run_type from %s where run_number=%d",
2151                                         fConfig->GetRunTypelbTable(), GetCurrentRun());
2152
2153         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2154
2155         // Query execution
2156         TSQLResult* aResult;
2157         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2158         if (!aResult) {
2159                 Log("SHUTTLE", Form("SetRunType - Can't execute query <%s>", sqlQuery.Data()));
2160                 return;
2161         }
2162
2163         TSQLRow* aRow;
2164         while ((aRow = aResult->Next())) {
2165                 TString det(aRow->GetField(0), aRow->GetFieldLength(0));
2166                 TString runType(aRow->GetField(1), aRow->GetFieldLength(1));
2167
2168                 fLogbookEntry->SetRunType(det, runType);
2169                 delete aRow;
2170         }
2171
2172         delete aResult;
2173
2174         return;
2175
2176 }
2177
2178 //______________________________________________________________________________________________
2179 const char* AliShuttle::GetRunType(const char* detCode)
2180 {
2181 // returns run type read from "run type" logbook
2182
2183         if(!fLogbookEntry) {
2184                 AliError("No logbook entry!");
2185                 return 0;
2186         }
2187
2188         return fLogbookEntry->GetRunType(detCode);
2189 }
2190
2191 //______________________________________________________________________________________________
2192 void AliShuttle::SetShuttleTempDir(const char* tmpDir)
2193 {
2194 // sets Shuttle temp directory
2195
2196         fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
2197 }
2198
2199 //______________________________________________________________________________________________
2200 void AliShuttle::SetShuttleLogDir(const char* logDir)
2201 {
2202 // sets Shuttle log directory
2203
2204         fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
2205 }