Run type field added in SHUTTLE framework. Run type is read from "run type" logbook...
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  *                                                                        *
4  * Author: The ALICE Off-line Project.                                    *
5  * Contributors are mentioned in the code where appropriate.              *
6  *                                                                        *
7  * Permission to use, copy, modify and distribute this software and its   *
8  * documentation strictly for non-commercial purposes is hereby granted   *
9  * without fee, provided that the above copyright notice appears in all   *
10  * copies and that both the copyright notice and this permission notice   *
11  * appear in the supporting documentation. The authors make no claims     *
12  * about the suitability of this software for any purpose. It is          *
13  * provided "as is" without express or implied warranty.                  *
14  **************************************************************************/
15
16 /*
17 $Log$
18 Revision 1.30  2007/02/13 11:23:21  acolla
19 Moved getters and setters of Shuttle's main OCDB/Reference, local
20 OCDB/Reference, temp and log folders to AliShuttleInterface
21
22 Revision 1.27  2007/01/30 17:52:42  jgrosseo
23 adding monalisa monitoring
24
25 Revision 1.26  2007/01/23 19:20:03  acolla
26 Removed old ldif files, added TOF, MCH ldif files. Added some options in
27 AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
28 SetShuttleLogDir
29
30 Revision 1.25  2007/01/15 19:13:52  acolla
31 Moved some AliInfo to AliDebug in SendMail function
32
33 Revision 1.21  2006/12/07 08:51:26  jgrosseo
34 update (alberto):
35 table, db names in ldap configuration
36 added GRP preprocessor
37 DCS data can also be retrieved by data point
38
39 Revision 1.20  2006/11/16 16:16:48  jgrosseo
40 introducing strict run ordering flag
41 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
42
43 Revision 1.19  2006/11/06 14:23:04  jgrosseo
44 major update (Alberto)
45 o) reading of run parameters from the logbook
46 o) online offline naming conversion
47 o) standalone DCSclient package
48
49 Revision 1.18  2006/10/20 15:22:59  jgrosseo
50 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
51 o) Merging Collect, CollectAll, CollectNew function
52 o) Removing implementation of empty copy constructors (declaration still there!)
53
54 Revision 1.17  2006/10/05 16:20:55  jgrosseo
55 adapting to new CDB classes
56
57 Revision 1.16  2006/10/05 15:46:26  jgrosseo
58 applying to the new interface
59
60 Revision 1.15  2006/10/02 16:38:39  jgrosseo
61 update (alberto):
62 fixed memory leaks
63 storing of objects that failed to be stored to the grid before
64 interfacing of shuttle status table in daq system
65
66 Revision 1.14  2006/08/29 09:16:05  jgrosseo
67 small update
68
69 Revision 1.13  2006/08/15 10:50:00  jgrosseo
70 effc++ corrections (alberto)
71
72 Revision 1.12  2006/08/08 14:19:29  jgrosseo
73 Update to shuttle classes (Alberto)
74
75 - Possibility to set the full object's path in the Preprocessor's and
76 Shuttle's  Store functions
77 - Possibility to extend the object's run validity in the same classes
78 ("startValidity" and "validityInfinite" parameters)
79 - Implementation of the StoreReferenceData function to store reference
80 data in a dedicated CDB storage.
81
82 Revision 1.11  2006/07/21 07:37:20  jgrosseo
83 last run is stored after each run
84
85 Revision 1.10  2006/07/20 09:54:40  jgrosseo
86 introducing status management: The processing per subdetector is divided into several steps,
87 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
88 can keep track of the number of failures and skips further processing after a certain threshold is
89 exceeded. These thresholds can be configured in LDAP.
90
91 Revision 1.9  2006/07/19 10:09:55  jgrosseo
92 new configuration, accesst to DAQ FES (Alberto)
93
94 Revision 1.8  2006/07/11 12:44:36  jgrosseo
95 adding parameters for extended validity range of data produced by preprocessor
96
97 Revision 1.7  2006/07/10 14:37:09  jgrosseo
98 small fix + todo comment
99
100 Revision 1.6  2006/07/10 13:01:41  jgrosseo
101 enhanced storing of last sucessfully processed run (alberto)
102
103 Revision 1.5  2006/07/04 14:59:57  jgrosseo
104 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
105
106 Revision 1.4  2006/06/12 09:11:16  jgrosseo
107 coding conventions (Alberto)
108
109 Revision 1.3  2006/06/06 14:26:40  jgrosseo
110 o) removed files that were moved to STEER
111 o) shuttle updated to follow the new interface (Alberto)
112
113 Revision 1.2  2006/03/07 07:52:34  hristov
114 New version (B.Yordanov)
115
116 Revision 1.6  2005/11/19 17:19:14  byordano
117 RetrieveDATEEntries and RetrieveConditionsData added
118
119 Revision 1.5  2005/11/19 11:09:27  byordano
120 AliShuttle declaration added
121
122 Revision 1.4  2005/11/17 17:47:34  byordano
123 TList changed to TObjArray
124
125 Revision 1.3  2005/11/17 14:43:23  byordano
126 import to local CVS
127
128 Revision 1.1.1.1  2005/10/28 07:33:58  hristov
129 Initial import as subdirectory in AliRoot
130
131 Revision 1.2  2005/09/13 08:41:15  byordano
132 default startTime endTime added
133
134 Revision 1.4  2005/08/30 09:13:02  byordano
135 some docs added
136
137 Revision 1.3  2005/08/29 21:15:47  byordano
138 some docs added
139
140 */
141
142 //
143 // This class is the main manager for AliShuttle. 
144 // It organizes the data retrieval from DCS and call the 
145 // interface methods of AliPreprocessor.
146 // For every detector in AliShuttleConfgi (see AliShuttleConfig),
147 // data for its set of aliases is retrieved. If there is registered
148 // AliPreprocessor for this detector then it will be used
149 // accroding to the schema (see AliPreprocessor).
150 // If there isn't registered AliPreprocessor than the retrieved
151 // data is stored automatically to the undelying AliCDBStorage.
152 // For detSpec is used the alias name.
153 //
154
155 #include "AliShuttle.h"
156
157 #include "AliCDBManager.h"
158 #include "AliCDBStorage.h"
159 #include "AliCDBId.h"
160 #include "AliCDBRunRange.h"
161 #include "AliCDBPath.h"
162 #include "AliCDBEntry.h"
163 #include "AliShuttleConfig.h"
164 #include "DCSClient/AliDCSClient.h"
165 #include "AliLog.h"
166 #include "AliPreprocessor.h"
167 #include "AliShuttleStatus.h"
168 #include "AliShuttleLogbookEntry.h"
169
170 #include <TSystem.h>
171 #include <TObject.h>
172 #include <TString.h>
173 #include <TTimeStamp.h>
174 #include <TObjString.h>
175 #include <TSQLServer.h>
176 #include <TSQLResult.h>
177 #include <TSQLRow.h>
178 #include <TMutex.h>
179
180 #include <TMonaLisaWriter.h>
181
182 #include <fstream>
183
184 #include <sys/types.h>
185 #include <sys/wait.h>
186
187 ClassImp(AliShuttle)
188
189 Bool_t AliShuttle::fgkProcessDCS(kTRUE);
190
191 //______________________________________________________________________________________________
192 AliShuttle::AliShuttle(const AliShuttleConfig* config,
193                 UInt_t timeout, Int_t retries):
194 fConfig(config),
195 fTimeout(timeout), fRetries(retries),
196 fPreprocessorMap(),
197 fLogbookEntry(0),
198 fCurrentDetector(),
199 fStatusEntry(0),
200 fGridError(kFALSE),
201 fMonitoringMutex(0),
202 fLastActionTime(0),
203 fLastAction(),
204 fMonaLisa(0)
205 {
206         //
207         // config: AliShuttleConfig used
208         // timeout: timeout used for AliDCSClient connection
209         // retries: the number of retries in case of connection error.
210         //
211
212         if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
213         for(int iSys=0;iSys<4;iSys++) {
214                 fServer[iSys]=0;
215                 if (iSys < 3)
216                         fFXSlist[iSys].SetOwner(kTRUE);
217         }
218         fPreprocessorMap.SetOwner(kTRUE);
219
220         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
221                 fFirstUnprocessed[iDet] = kFALSE;
222
223         fMonitoringMutex = new TMutex();
224 }
225
226 //______________________________________________________________________________________________
227 AliShuttle::~AliShuttle()
228 {
229 // destructor
230
231         fPreprocessorMap.DeleteAll();
232         for(int iSys=0;iSys<4;iSys++)
233                 if(fServer[iSys]) {
234                         fServer[iSys]->Close();
235                         delete fServer[iSys];
236                         fServer[iSys] = 0;
237                 }
238
239         if (fStatusEntry){
240                 delete fStatusEntry;
241                 fStatusEntry = 0;
242         }
243         
244         if (fMonitoringMutex) 
245         {
246                 delete fMonitoringMutex;
247                 fMonitoringMutex = 0;
248         }
249 }
250
251 //______________________________________________________________________________________________
252 void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
253 {
254         //
255         // Registers new AliPreprocessor.
256         // It uses GetName() for indentificator of the pre processor.
257         // The pre processor is registered it there isn't any other
258         // with the same identificator (GetName()).
259         //
260
261         const char* detName = preprocessor->GetName();
262         if(GetDetPos(detName) < 0)
263                 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
264
265         if (fPreprocessorMap.GetValue(detName)) {
266                 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
267                 return;
268         }
269
270         fPreprocessorMap.Add(new TObjString(detName), preprocessor);
271 }
272 //______________________________________________________________________________________________
273 UInt_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
274                 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
275 {
276   // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
277   // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
278   // using this function. Use StoreReferenceData instead!
279   // It calls WriteToCDB function which perform actual storage
280
281         return WriteToCDB(fgkMainCDB, fgkLocalCDB, path, object,
282                                 metaData, validityStart, validityInfinite);
283
284 }
285
286 //______________________________________________________________________________________________
287 UInt_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
288 {
289   // Stores a CDB object in the storage for reference data. This objects will not be available during
290   // offline reconstrunction. Use this function for reference data only!
291   // It calls WriteToCDB function which perform actual storage
292
293         return WriteToCDB(fgkMainRefStorage, fgkLocalRefStorage, path, object, metaData);
294
295 }
296
297 //______________________________________________________________________________________________
298 UInt_t AliShuttle::WriteToCDB(const char* mainUri, const char* localUri,
299                         const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
300                         Int_t validityStart, Bool_t validityInfinite)
301 {
302   // write object into the CDB. Parameters are passed by Store and StoreReferenceData functions.
303   // The parameters are:
304   //   1) Uri of the main storage (Grid)
305   //   2) Uri of the backup storage (Local)
306   //   3) the object's path.
307   //   4) the object to be stored
308   //   5) the metaData to be associated with the object
309   //   6) the validity start run number w.r.t. the current run,
310   //      if the data is valid only for this run leave the default 0
311   //   7) specifies if the calibration data is valid for infinity (this means until updated),
312   //      typical for calibration runs, the default is kFALSE
313   //
314   // returns 0 if fail
315   //         1 if stored in main (Grid) storage
316   //         2 if stored in backup (Local) storage
317
318         const char* cdbType = (mainUri == fgkMainCDB) ? "CDB" : "Reference";
319
320         Int_t firstRun = GetCurrentRun() - validityStart;
321         if(firstRun < 0) {
322                 AliError("First valid run happens to be less than 0! Setting it to 0.");
323                 firstRun=0;
324         }
325
326         Int_t lastRun = -1;
327         if(validityInfinite) {
328                 lastRun = AliCDBRunRange::Infinity();
329         } else {
330                 lastRun = GetCurrentRun();
331         }
332
333         AliCDBId id(path, firstRun, lastRun, -1, -1);
334
335         if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
336                 TObjString runUsed = Form("%d", GetCurrentRun());
337                 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
338         }
339
340         UInt_t result = 0;
341
342         if (!(AliCDBManager::Instance()->GetStorage(mainUri))) {
343                 AliError(Form("WriteToCDB - Cannot activate main %s storage", cdbType));
344         } else {
345                 result = (UInt_t) AliCDBManager::Instance()->GetStorage(mainUri)
346                                         ->Put(object, id, metaData);
347         }
348
349         if(!result) {
350
351                 Log(fCurrentDetector,
352                         Form("WriteToCDB - Problem with main %s storage. Putting <%s> into backup storage",
353                                 cdbType, path.GetPath().Data()));
354
355                 // Set Grid version to current run number, to ease retrieval later
356                 id.SetVersion(GetCurrentRun());
357
358                 result = AliCDBManager::Instance()->GetStorage(localUri)
359                                         ->Put(object, id, metaData);
360
361                 if(result) {
362                         result = 2;
363                         fGridError = kTRUE;
364                 }else{
365                         Log(fCurrentDetector, "WriteToCDB - Can't store data!");
366                 }
367         }
368
369         return result;
370
371 }
372
373 //______________________________________________________________________________________________
374 AliShuttleStatus* AliShuttle::ReadShuttleStatus()
375 {
376 // Reads the AliShuttleStatus from the CDB
377
378         if (fStatusEntry){
379                 delete fStatusEntry;
380                 fStatusEntry = 0;
381         }
382
383         fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
384                 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
385
386         if (!fStatusEntry) return 0;
387         fStatusEntry->SetOwner(1);
388
389         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
390         if (!status) {
391                 AliError("Invalid object stored to CDB!");
392                 return 0;
393         }
394
395         return status;
396 }
397
398 //______________________________________________________________________________________________
399 Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
400 {
401 // writes the status for one subdetector
402
403         if (fStatusEntry){
404                 delete fStatusEntry;
405                 fStatusEntry = 0;
406         }
407
408         Int_t run = GetCurrentRun();
409
410         AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
411
412         fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
413         fStatusEntry->SetOwner(1);
414
415         UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
416
417         if (!result) {
418                 AliError(Form("WriteShuttleStatus for %s, run %d failed", fCurrentDetector.Data(), run));
419                 return kFALSE;
420         }
421         
422         SendMLInfo();
423
424         return kTRUE;
425 }
426
427 //______________________________________________________________________________________________
428 void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
429 {
430   // changes the AliShuttleStatus for the given detector and run to the given status
431
432         if (!fStatusEntry){
433                 AliError("UNEXPECTED: fStatusEntry empty");
434                 return;
435         }
436
437         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
438
439         if (!status){
440                 AliError("UNEXPECTED: status could not be read from current CDB entry");
441                 return;
442         }
443
444         TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
445                                 fCurrentDetector.Data(),
446                                 status->GetStatusName(),
447                                 status->GetStatusName(newStatus));
448         Log("SHUTTLE", actionStr);
449         SetLastAction(actionStr);
450
451         status->SetStatus(newStatus);
452         if (increaseCount) status->IncreaseCount();
453
454         AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
455
456         SendMLInfo();
457 }
458
459 //______________________________________________________________________________________________
460 void AliShuttle::SendMLInfo()
461 {
462         //
463         // sends ML information about the current status of the current detector being processed
464         //
465         
466         AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
467         
468         if (!status){
469                 AliError("UNEXPECTED: status could not be read from current CDB entry");
470                 return;
471         }
472         
473         TMonaLisaText  mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
474         TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
475
476         TList mlList;
477         mlList.Add(&mlStatus);
478         mlList.Add(&mlRetryCount);
479
480         fMonaLisa->SendParameters(&mlList);
481 }
482
483 //______________________________________________________________________________________________
484 Bool_t AliShuttle::ContinueProcessing()
485 {
486 // this function reads the AliShuttleStatus information from CDB and
487 // checks if the processing should be continued
488 // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
489
490         if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
491
492         AliPreprocessor* aPreprocessor =
493                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
494         if (!aPreprocessor)
495         {
496                 AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data()));
497                 return kFALSE;
498         }
499
500         AliShuttleLogbookEntry::Status entryStatus =
501                 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
502
503         if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
504                 AliInfo(Form("ContinueProcessing - %s is %s",
505                                 fCurrentDetector.Data(),
506                                 fLogbookEntry->GetDetectorStatusName(entryStatus)));
507                 return kFALSE;
508         }
509
510         // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
511
512         // check if current run is first unprocessed run for current detector
513         if (fConfig->StrictRunOrder(fCurrentDetector) &&
514                 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
515         {
516                 Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
517                 return kFALSE;
518         }
519
520         AliShuttleStatus* status = ReadShuttleStatus();
521         if (!status) {
522                 // first time
523                 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
524                                 fCurrentDetector.Data()));
525                 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
526                 return WriteShuttleStatus(status);
527         }
528
529         // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
530         // If it happens it may mean Logbook updating failed... let's do it now!
531         if (status->GetStatus() == AliShuttleStatus::kDone ||
532             status->GetStatus() == AliShuttleStatus::kFailed){
533                 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
534                                         fCurrentDetector.Data(),
535                                         status->GetStatusName(status->GetStatus())));
536                 UpdateShuttleLogbook(fCurrentDetector.Data(),
537                                         status->GetStatusName(status->GetStatus()));
538                 return kFALSE;
539         }
540
541         if (status->GetStatus() == AliShuttleStatus::kStoreFailed) {
542                 Log("SHUTTLE",
543                         Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
544                                 fCurrentDetector.Data()));
545                 if(TryToStoreAgain()){
546                         Log(fCurrentDetector.Data(), "ContinueProcessing - All objects successfully stored into OCDB");
547                         UpdateShuttleStatus(AliShuttleStatus::kDone);
548                         UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
549                 } else {
550                         Log("SHUTTLE",
551                                 Form("ContinueProcessing - %s: Grid storage failed again",
552                                         fCurrentDetector.Data()));
553                         // trigger ML information manually because we do not had a status change
554                         SendMLInfo();
555                 }
556                 return kFALSE;
557         }
558
559         // if we get here, there is a restart
560         Bool_t cont = kFALSE;
561
562         // abort conditions
563         if (status->GetCount() >= fConfig->GetMaxRetries()) {
564                 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
565                                 "Updating Shuttle Logbook", fCurrentDetector.Data(),
566                                 status->GetCount(), status->GetStatusName()));
567                 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
568                 UpdateShuttleStatus(AliShuttleStatus::kFailed);
569         } else {
570                 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
571                                 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
572                                 status->GetStatusName(), status->GetCount()));
573                 UpdateShuttleStatus(AliShuttleStatus::kStarted, kTRUE);
574                 cont = kTRUE;
575         }
576
577         // Send mail to detector expert!
578         AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data()));
579         if (!SendMail())
580                 Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
581                                 fCurrentDetector.Data()));
582
583         return cont;
584 }
585
586 //______________________________________________________________________________________________
587 Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
588 {
589         //
590         // Makes data retrieval for all detectors in the configuration.
591         // entry: Shuttle logbook entry, contains run paramenters and status of detectors
592         // (Unprocessed, Inactive, Failed or Done).
593         // Returns kFALSE in case of error occured and kTRUE otherwise
594         //
595
596         if(!entry) return kFALSE;
597
598         fLogbookEntry = entry;
599
600         if (fLogbookEntry->IsDone())
601         {
602                 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
603                 UpdateShuttleLogbook("shuttle_done");
604                 fLogbookEntry = 0;
605                 return kTRUE;
606         }
607
608         // create ML instance that monitors this run
609         fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
610         // disable monitoring of other parameters that come e.g. from TFile
611         gMonitoringWriter = 0;
612
613         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
614                                         GetCurrentRun()));
615
616         // Set run type from run type logbook into current fLogbookEntry
617         SetRunType();
618
619         // Send the information to ML
620         TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
621
622         TList mlList;
623         mlList.Add(&mlStatus);
624
625         fMonaLisa->SendParameters(&mlList);
626                         
627         fLogbookEntry->Print("all");
628
629         // Initialization
630         Bool_t hasError = kFALSE;
631
632         AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
633         if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
634         AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
635         if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
636
637         // Loop on detectors in the configuration
638         TIter iter(fConfig->GetDetectors());
639         TObjString* aDetector = 0;
640
641         while ((aDetector = (TObjString*) iter.Next()))
642         {
643                 fCurrentDetector = aDetector->String();
644
645                 if (ContinueProcessing() == kFALSE) continue;
646
647                 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START  ******",
648                                                 GetCurrentRun(), aDetector->GetName()));
649
650                 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
651
652                 Log(fCurrentDetector.Data(), "Starting processing");
653
654                 Int_t pid = fork();
655
656                 if (pid < 0)
657                 {
658                         Log("SHUTTLE", "ERROR: Forking failed");
659                 }
660                 else if (pid > 0)
661                 {
662                         // parent
663                         AliInfo(Form("In parent process of %d - %s: Starting monitoring",
664                                                         GetCurrentRun(), aDetector->GetName()));
665
666                         Long_t begin = time(0);
667
668                         int status; // to be used with waitpid, on purpose an int (not Int_t)!
669                         while (waitpid(pid, &status, WNOHANG) == 0)
670                         {
671                                 Long_t expiredTime = time(0) - begin;
672
673                                 if (expiredTime > fConfig->GetPPTimeOut())
674                                 {
675                                         Log("SHUTTLE", Form("Process time out. Run time: %d seconds. Killing...",
676                                                                 expiredTime));
677
678                                         kill(pid, 9);
679
680                                         hasError = kTRUE;
681
682                                         gSystem->Sleep(1000);
683                                 }
684                                 else
685                                 {
686                                         if (expiredTime % 60 == 0)
687                                         Log("SHUTTLE", Form("Checked process. Run time: %d seconds.",
688                                                                 expiredTime));
689                                         gSystem->Sleep(1000);
690                                 }
691                         }
692
693                         AliInfo(Form("In parent process of %d - %s: Client has terminated.",
694                                                                 GetCurrentRun(), aDetector->GetName()));
695
696                         if (WIFEXITED(status))
697                         {
698                                 Int_t returnCode = WEXITSTATUS(status);
699
700                                 Log("SHUTTLE", Form("The return code is %d", returnCode));
701
702                                 if (returnCode != 0)
703                                 hasError = kTRUE;
704                         }
705                 }
706                 else if (pid == 0)
707                 {
708                         // client
709                         AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
710
711                         UInt_t result = ProcessCurrentDetector();
712
713                         Int_t returnCode = 0; // will be set to 1 in case of an error
714
715                         if (!result)
716                         {
717                                 returnCode = 1;
718                                 AliInfo(Form("\n \t\t\t****** run %d - %s: PREPROCESSOR ERROR ****** \n\n",
719                                                         GetCurrentRun(), aDetector->GetName()));
720                         }
721                         else if (result == 2)
722                         {
723                                 AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
724                                                         GetCurrentRun(), aDetector->GetName()));
725                         } else
726                         {
727                                 AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
728                                                         GetCurrentRun(), aDetector->GetName()));
729                         }
730
731                         if (result > 0)
732                         {
733                                 // Process successful: Update time_processed field in FXS logbooks!
734                                 if (UpdateTable() == kFALSE) returnCode = 1;
735                         }
736
737                         for (UInt_t iSys=0; iSys<3; iSys++)
738                         {
739                                 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
740                         }
741
742                         AliInfo(Form("Client process of %d - %s is exiting now with %d.",
743                                                         GetCurrentRun(), aDetector->GetName(), returnCode));
744
745                         // the client exits here
746                         gSystem->Exit(returnCode);
747
748                         AliError("We should never get here!!!");
749                 }
750         }
751
752         AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
753                                                         GetCurrentRun()));
754
755         //check if shuttle is done for this run, if so update logbook
756         TObjArray checkEntryArray;
757         checkEntryArray.SetOwner(1);
758         TString whereClause = Form("where run=%d", GetCurrentRun());
759         if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
760                 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
761                                                 GetCurrentRun()));
762                 return hasError == kFALSE;
763         }
764
765         AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
766                                                 (checkEntryArray.At(0));
767
768         if (checkEntry)
769         {
770                 if (checkEntry->IsDone())
771                 {
772                         Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
773                         UpdateShuttleLogbook("shuttle_done");
774                 }
775                 else
776                 {
777                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
778                         {
779                                 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
780                                 {
781                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
782                                                         checkEntry->GetRun(), GetDetName(iDet)));
783                                         fFirstUnprocessed[iDet] = kFALSE;
784                                 }
785                         }
786                 }
787         }
788
789         // remove ML instance
790         delete fMonaLisa;
791         fMonaLisa = 0;
792
793         fLogbookEntry = 0;
794
795         return hasError == kFALSE;
796 }
797
798 //______________________________________________________________________________________________
799 UInt_t AliShuttle::ProcessCurrentDetector()
800 {
801         //
802         // Makes data retrieval just for a specific detector (fCurrentDetector).
803         // Threre should be a configuration for this detector.
804
805         AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
806
807         UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
808
809         TMap dcsMap;
810         dcsMap.SetOwner(1);
811
812         Bool_t aDCSError = kFALSE;
813         fGridError = kFALSE;
814
815         // TODO Test only... I've added a flag that allows to
816         // exclude DCS archive DB query
817         if (!fgkProcessDCS)
818         {
819                 AliInfo("Skipping DCS processing!");
820                 aDCSError = kFALSE;
821         } else {
822                 TString host(fConfig->GetDCSHost(fCurrentDetector));
823                 Int_t port = fConfig->GetDCSPort(fCurrentDetector);
824
825                 // Retrieval of Aliases
826                 TObjString* anAlias = 0;
827                 Int_t iAlias = 1;
828                 Int_t nTotAliases= ((TMap*)fConfig->GetDCSAliases(fCurrentDetector))->GetEntries();
829                 TIter iterAliases(fConfig->GetDCSAliases(fCurrentDetector));
830                 while ((anAlias = (TObjString*) iterAliases.Next()))
831                 {
832                         TObjArray *valueSet = new TObjArray();
833                         valueSet->SetOwner(1);
834
835                         if (((iAlias-1) % 500) == 0 || iAlias == nTotAliases)
836                                 AliInfo(Form("Querying DCS archive: alias %s (%d of %d)",
837                                                 anAlias->GetName(), iAlias++, nTotAliases));
838                         aDCSError = (GetValueSet(host, port, anAlias->String(), valueSet, kAlias) == 0);
839
840                         if(!aDCSError)
841                         {
842                                 dcsMap.Add(anAlias->Clone(), valueSet);
843                         } else {
844                                 Log(fCurrentDetector,
845                                         Form("ProcessCurrentDetector - Error while retrieving alias %s",
846                                                 anAlias->GetName()));
847                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
848                                 dcsMap.DeleteAll();
849                                 return 0;
850                         }
851                 }
852
853                 // Retrieval of Data Points
854                 TObjString* aDP = 0;
855                 Int_t iDP = 0;
856                 Int_t nTotDPs= ((TMap*)fConfig->GetDCSDataPoints(fCurrentDetector))->GetEntries();
857                 TIter iterDP(fConfig->GetDCSDataPoints(fCurrentDetector));
858                 while ((aDP = (TObjString*) iterDP.Next()))
859                 {
860                         TObjArray *valueSet = new TObjArray();
861                         valueSet->SetOwner(1);
862                         if (((iDP-1) % 500) == 0 || iDP == nTotDPs)
863                                 AliInfo(Form("Querying DCS archive: DP %s (%d of %d)",
864                                                 aDP->GetName(), iDP++, nTotDPs));
865                         aDCSError = (GetValueSet(host, port, aDP->String(), valueSet, kDP) == 0);
866
867                         if(!aDCSError)
868                         {
869                                 dcsMap.Add(aDP->Clone(), valueSet);
870                         } else {
871                                 Log(fCurrentDetector,
872                                         Form("ProcessCurrentDetector - Error while retrieving data point %s",
873                                                 aDP->GetName()));
874                                 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
875                                 dcsMap.DeleteAll();
876                                 return 0;
877                         }
878                 }
879         }
880
881         // DCS Archive DB processing successful. Call Preprocessor!
882         UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
883
884         AliPreprocessor* aPreprocessor =
885                 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
886
887         aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
888         UInt_t aPPResult = aPreprocessor->Process(&dcsMap);
889
890         UInt_t returnValue = 0;
891         if (aPPResult == 0) { // Preprocessor error
892                 UpdateShuttleStatus(AliShuttleStatus::kPPError);
893                 returnValue = 0;
894         } else if (fGridError == kFALSE) { // process and Grid storage ok!
895                 UpdateShuttleStatus(AliShuttleStatus::kDone);
896                 UpdateShuttleLogbook(fCurrentDetector, "DONE");
897                 Log(fCurrentDetector.Data(),
898                         "ProcessCurrentDetector - Preprocessor and Grid storage ended successfully");
899                 returnValue = 1;
900         } else { // Grid storage error (process ok, but object put in local storage)
901                 UpdateShuttleStatus(AliShuttleStatus::kStoreFailed);
902                 returnValue = 2;
903         }
904
905         dcsMap.DeleteAll();
906
907         return returnValue;
908 }
909
910 //______________________________________________________________________________________________
911 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
912                 TObjArray& entries)
913 {
914 // Query DAQ's Shuttle logbook and fills detector status object.
915 // Call QueryRunParameters to query DAQ logbook for run parameters.
916
917         entries.SetOwner(1);
918
919         // check connection, in case connect
920         if(!Connect(3)) return kFALSE;
921
922         TString sqlQuery;
923         sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
924
925         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
926         if (!aResult) {
927                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
928                 return kFALSE;
929         }
930
931         AliDebug(2,Form("Query = %s", sqlQuery.Data()));
932
933         if(aResult->GetRowCount() == 0) {
934 //              if(sqlQuery.EndsWith("where shuttle_done=0 order by run")){
935 //                      Log("SHUTTLE", "QueryShuttleLogbook - All runs in Shuttle Logbook are already DONE");
936 //                      delete aResult;
937 //                      return kTRUE;
938 //              } else {
939                         AliInfo("No entries in Shuttle Logbook match request");
940                         delete aResult;
941                         return kTRUE;
942 //              }
943         }
944
945         // TODO Check field count!
946         const UInt_t nCols = 22;
947         if (aResult->GetFieldCount() != (Int_t) nCols) {
948                 AliError("Invalid SQL result field number!");
949                 delete aResult;
950                 return kFALSE;
951         }
952
953         TSQLRow* aRow;
954         while ((aRow = aResult->Next())) {
955                 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
956                 Int_t run = runString.Atoi();
957
958                 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
959                 if (!entry)
960                         continue;
961
962                 // loop on detectors
963                 for(UInt_t ii = 0; ii < nCols; ii++)
964                         entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
965
966                 entries.AddLast(entry);
967                 delete aRow;
968         }
969
970 //      if(sqlQuery.EndsWith("where shuttle_done=0 order by run"))
971 //              Log("SHUTTLE", Form("QueryShuttleLogbook - Found %d unprocessed runs in Shuttle Logbook",
972 //                                                      entries.GetEntriesFast()));
973         delete aResult;
974         return kTRUE;
975 }
976
977 //______________________________________________________________________________________________
978 AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
979 {
980         //
981         // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
982         //
983
984         // check connection, in case connect
985         if (!Connect(3))
986                 return 0;
987
988         TString sqlQuery;
989         sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
990
991         TSQLResult* aResult = fServer[3]->Query(sqlQuery);
992         if (!aResult) {
993                 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
994                 return 0;
995         }
996
997         if (aResult->GetRowCount() == 0) {
998                 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
999                 delete aResult;
1000                 return 0;
1001         }
1002
1003         if (aResult->GetRowCount() > 1) {
1004                 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
1005                 delete aResult;
1006                 return 0;
1007         }
1008
1009         TSQLRow* aRow = aResult->Next();
1010         if (!aRow)
1011         {
1012                 AliError(Form("Could not retrieve row for run %d. Skipping", run));
1013                 delete aResult;
1014                 return 0;
1015         }
1016
1017         AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
1018
1019         for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
1020                 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
1021
1022         UInt_t startTime = entry->GetStartTime();
1023         UInt_t endTime = entry->GetEndTime();
1024
1025         if (!startTime || !endTime || startTime > endTime) {
1026                 Log("SHUTTLE",
1027                         Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
1028                                 run, startTime, endTime));
1029                 delete entry;
1030                 delete aRow;
1031                 delete aResult;
1032                 return 0;
1033         }
1034
1035         delete aRow;
1036         delete aResult;
1037
1038         return entry;
1039 }
1040
1041 //______________________________________________________________________________________________
1042 Bool_t AliShuttle::TryToStoreAgain()
1043 {
1044   // Called in case the detector failed to store the object in Grid OCDB
1045   // It tries to store the object again, if it does not find more recent and overlapping objects
1046   // Calls underlying TryToStoreAgain(const char*) function twice, for OCDB and Reference storage.
1047
1048         AliInfo("Trying to store OCDB data again...");
1049         Bool_t resultCDB = TryToStoreAgain(fgkMainCDB);
1050
1051         AliInfo("Trying to store reference data again...");
1052         Bool_t resultRef = TryToStoreAgain(fgkMainRefStorage);
1053
1054         return resultCDB && resultRef;
1055 }
1056
1057 //______________________________________________________________________________________________
1058 Bool_t AliShuttle::TryToStoreAgain(TString& gridURI)
1059 {
1060   // Called by TryToStoreAgain(), performs actual storage retry
1061
1062         TObjArray* gridIds=0;
1063
1064         Bool_t result = kTRUE;
1065
1066         const char* type = 0;
1067         TString backupURI;
1068         if(gridURI == fgkMainCDB) {
1069                 type = "OCDB";
1070                 backupURI = fgkLocalCDB;
1071         } else if(gridURI == fgkMainRefStorage) {
1072                 type = "reference";
1073                 backupURI = fgkLocalRefStorage;
1074         } else {
1075                 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
1076                 return kFALSE;
1077         }
1078
1079         AliCDBManager* man = AliCDBManager::Instance();
1080
1081         AliCDBStorage *gridSto = man->GetStorage(gridURI);
1082         if(!gridSto) {
1083                 Log(fCurrentDetector.Data(),
1084                         Form("TryToStoreAgain - cannot activate main %s storage", type));
1085                 return kFALSE;
1086         }
1087
1088         gridIds = gridSto->GetQueryCDBList();
1089
1090         // get objects previously stored in local CDB
1091         AliCDBStorage *backupSto = man->GetStorage(backupURI);
1092         AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
1093         // Local objects were stored with current run as Grid version!
1094         TList* localEntries = backupSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
1095         localEntries->SetOwner(1);
1096
1097         // loop on local stored objects
1098         TIter localIter(localEntries);
1099         AliCDBEntry *aLocEntry = 0;
1100         while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
1101                 aLocEntry->SetOwner(1);
1102                 AliCDBId aLocId = aLocEntry->GetId();
1103                 aLocEntry->SetVersion(-1);
1104                 aLocEntry->SetSubVersion(-1);
1105
1106                 // loop on Grid valid Id's
1107                 Bool_t store = kTRUE;
1108                 TIter gridIter(gridIds);
1109                 AliCDBId* aGridId = 0;
1110                 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
1111                         // If local object is valid up to infinity we store it only if it is
1112                         // the first unprocessed run!
1113                         if (aLocId.GetLastRun() == AliCDBRunRange::Infinity())
1114                         {
1115                                 if (!fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1116                                 {
1117                                         Log(fCurrentDetector.Data(),
1118                                                 ("TryToStoreAgain - This object has validity infinite but "
1119                                                  "there are previous unprocessed runs!"));
1120                                         continue;
1121                                 } else {
1122                                         break;
1123                                 }
1124                         }
1125                         if(aGridId->GetPath() != aLocId.GetPath()) continue;
1126                         // skip all objects valid up to infinity
1127                         if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
1128                         // if we get here, it means there's already some more recent object stored on Grid!
1129                         store = kFALSE;
1130                         break;
1131                 }
1132
1133                 if(!store){
1134                         Log(fCurrentDetector.Data(),
1135                                 Form("TryToStoreAgain - A more recent object already exists in %s storage: <%s>",
1136                                         type, aGridId->ToString().Data()));
1137                         // removing local filename...
1138                         // TODO maybe it's better not to remove it, it was not copied to the Grid!
1139                         TString filename;
1140                         backupSto->IdToFilename(aLocId, filename);
1141                         AliInfo(Form("Removing local file %s", filename.Data()));
1142                         gSystem->Exec(Form("rm %s",filename.Data()));
1143                         continue;
1144                 }
1145
1146                 // If we get here, the file can be stored!
1147                 Bool_t storeOk = gridSto->Put(aLocEntry);
1148                 if(storeOk){
1149                         Log(fCurrentDetector.Data(),
1150                                 Form("TryToStoreAgain - Object <%s> successfully put into %s storage",
1151                                         aLocId.ToString().Data(), type));
1152
1153                         // removing local filename...
1154                         TString filename;
1155                         backupSto->IdToFilename(aLocId, filename);
1156                         AliInfo(Form("Removing local file %s", filename.Data()));
1157                         gSystem->Exec(Form("rm %s", filename.Data()));
1158                         continue;
1159                 } else  {
1160                         Log(fCurrentDetector.Data(),
1161                                 Form("TryToStoreAgain - Grid %s storage of object <%s> failed again",
1162                                         type, aLocId.ToString().Data()));
1163                         result = kFALSE;
1164                 }
1165         }
1166         localEntries->Clear();
1167
1168         return result;
1169 }
1170
1171 //______________________________________________________________________________________________
1172 Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry,
1173                                 TObjArray* valueSet, DCSType type)
1174 {
1175 // Retrieve all "entry" data points from the DCS server
1176 // host, port: TSocket connection parameters
1177 // entry: name of the alias or data point
1178 // valueSet: array of retrieved AliDCSValue's
1179 // type: kAlias or kDP
1180
1181         AliDCSClient client(host, port, fTimeout, fRetries);
1182         if (!client.IsConnected())
1183         {
1184                 return kFALSE;
1185         }
1186
1187         Int_t result=0;
1188
1189         if (type == kAlias)
1190         {
1191                 result = client.GetAliasValues(entry,
1192                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1193         } else
1194         if (type == kDP)
1195         {
1196                 result = client.GetDPValues(entry,
1197                         GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1198         }
1199
1200         if (result < 0)
1201         {
1202                 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
1203                         entry, AliDCSClient::GetErrorString(result)));
1204
1205                 if (result == AliDCSClient::fgkServerError)
1206                 {
1207                         Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
1208                                 client.GetServerError().Data()));
1209                 }
1210
1211                 return kFALSE;
1212         }
1213
1214         return kTRUE;
1215 }
1216
1217 //______________________________________________________________________________________________
1218 const char* AliShuttle::GetFile(Int_t system, const char* detector,
1219                 const char* id, const char* source)
1220 {
1221 // Get calibration file from file exchange servers
1222 // First queris the FXS database for the file name, using the run, detector, id and source info
1223 // then calls RetrieveFile(filename) for actual copy to local disk
1224 // run: current run being processed (given by Logbook entry fLogbookEntry)
1225 // detector: the Preprocessor name
1226 // id: provided as a parameter by the Preprocessor
1227 // source: provided by the Preprocessor through GetFileSources function
1228
1229         // check connection, in case connect
1230         if (!Connect(system))
1231         {
1232                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1233                 return 0;
1234         }
1235
1236         // Query preparation
1237         TString sourceName(source);
1238         Int_t nFields = 3;
1239         TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
1240                                                                 fConfig->GetFXSdbTable(system));
1241         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1242                                                                 GetCurrentRun(), detector, id);
1243
1244         if (system == kDAQ)
1245         {
1246                 whereClause += Form(" and DAQsource=\"%s\"", source);
1247         }
1248         else if (system == kDCS)
1249         {
1250                 sourceName="none";
1251         }
1252         else if (system == kHLT)
1253         {
1254                 whereClause += Form(" and DDLnumbers=\"%s\"", source);
1255                 nFields = 3;
1256         }
1257
1258         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1259
1260         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1261
1262         // Query execution
1263         TSQLResult* aResult = 0;
1264         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1265         if (!aResult) {
1266                 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
1267                                 GetSystemName(system), id, sourceName.Data()));
1268                 return 0;
1269         }
1270
1271         if(aResult->GetRowCount() == 0)
1272         {
1273                 Log(detector,
1274                         Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
1275                                 GetSystemName(system), id, sourceName.Data()));
1276                 delete aResult;
1277                 return 0;
1278         }
1279
1280         if (aResult->GetRowCount() > 1) {
1281                 Log(detector,
1282                         Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
1283                                 GetSystemName(system), id, sourceName.Data()));
1284                 delete aResult;
1285                 return 0;
1286         }
1287
1288         if (aResult->GetFieldCount() != nFields) {
1289                 Log(detector,
1290                         Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
1291                                 GetSystemName(system), id, sourceName.Data()));
1292                 delete aResult;
1293                 return 0;
1294         }
1295
1296         TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1297
1298         if (!aRow){
1299                 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
1300                                 GetSystemName(system), id, sourceName.Data()));
1301                 delete aResult;
1302                 return 0;
1303         }
1304
1305         TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1306         TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
1307         TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
1308
1309         delete aResult;
1310         delete aRow;
1311
1312         AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
1313                                 filePath.Data(), fileSize.Data(), fileChecksum.Data()));
1314
1315         // retrieved file is renamed to make it unique
1316         TString localFileName = Form("%s_%s_%d_%s_%s.shuttle",
1317                                         GetSystemName(system), detector, GetCurrentRun(), id, sourceName.Data());
1318
1319
1320         // file retrieval from FXS
1321         UInt_t nRetries = 0;
1322         UInt_t maxRetries = 3;
1323         Bool_t result = kFALSE;
1324
1325         // copy!! if successful TSystem::Exec returns 0
1326         while(nRetries++ < maxRetries) {
1327                 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1328                 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
1329                 if(!result)
1330                 {
1331                         Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
1332                                         filePath.Data(), GetSystemName(system)));
1333                         continue;
1334                 } else {
1335                         AliInfo(Form("File %s copied from %s FXS into %s/%s",
1336                                                 filePath.Data(), GetSystemName(system),
1337                                                 GetShuttleTempDir(), localFileName.Data()));
1338                 }
1339
1340                 if (fileChecksum.Length()>0)
1341                 {
1342                         // compare md5sum of local file with the one stored in the FXS DB
1343                         Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null",
1344                                                 GetShuttleTempDir(), localFileName.Data(), fileChecksum.Data()));
1345
1346                         if (md5Comp != 0)
1347                         {
1348                                 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
1349                                                         filePath.Data()));
1350                                 result = kFALSE;
1351                                 continue;
1352                         }
1353                 } else {
1354                         Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
1355                                                         filePath.Data(), GetSystemName(system)));
1356                 }
1357                 if (result) break;
1358         }
1359
1360         if(!result) return 0;
1361
1362         fFXSCalled[system]=kTRUE;
1363         TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
1364         fFXSlist[system].Add(fileParams);
1365
1366         static TString fullLocalFileName;
1367         fullLocalFileName = TString::Format("%s/%s", GetShuttleTempDir(), localFileName.Data());
1368
1369         AliInfo(Form("fullLocalFileName = %s", fullLocalFileName.Data()));
1370
1371         return fullLocalFileName.Data();
1372
1373 }
1374
1375 //______________________________________________________________________________________________
1376 Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
1377 {
1378 // Copies file from FXS to local Shuttle machine
1379
1380         // check temp directory: trying to cd to temp; if it does not exist, create it
1381         AliDebug(2, Form("Copy file %s from %s FXS into %s/%s",
1382                         GetSystemName(system), fxsFileName, GetShuttleTempDir(), localFileName));
1383
1384         void* dir = gSystem->OpenDirectory(GetShuttleTempDir());
1385         if (dir == NULL) {
1386                 if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) {
1387                         AliError(Form("Can't open directory <%s>", GetShuttleTempDir()));
1388                         return kFALSE;
1389                 }
1390
1391         } else {
1392                 gSystem->FreeDirectory(dir);
1393         }
1394
1395         TString baseFXSFolder;
1396         if (system == kDAQ)
1397         {
1398                 baseFXSFolder = "FES/";
1399         }
1400         else if (system == kDCS)
1401         {
1402                 baseFXSFolder = "";
1403         }
1404         else if (system == kHLT)
1405         {
1406                 baseFXSFolder = "~/";
1407         }
1408
1409
1410         TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s/%s",
1411                 fConfig->GetFXSPort(system),
1412                 fConfig->GetFXSUser(system),
1413                 fConfig->GetFXSHost(system),
1414                 baseFXSFolder.Data(),
1415                 fxsFileName,
1416                 GetShuttleTempDir(),
1417                 localFileName);
1418
1419         AliDebug(2, Form("%s",command.Data()));
1420
1421         Bool_t result = (gSystem->Exec(command.Data()) == 0);
1422
1423         return result;
1424 }
1425
1426 //______________________________________________________________________________________________
1427 TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
1428 {
1429 // Get sources producing the condition file Id from file exchange servers
1430
1431         if (system == kDCS)
1432         {
1433                 AliError("DCS system has only one source of data!");
1434                 return NULL;
1435
1436         }
1437
1438         // check connection, in case connect
1439         if (!Connect(system))
1440         {
1441                 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1442                 return NULL;
1443         }
1444
1445         TString sourceName = 0;
1446         if (system == kDAQ)
1447         {
1448                 sourceName = "DAQsource";
1449         } else if (system == kHLT)
1450         {
1451                 sourceName = "DDLnumbers";
1452         }
1453
1454         TString sqlQueryStart = Form("select %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
1455         TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1456                                 GetCurrentRun(), detector, id);
1457         TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1458
1459         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1460
1461         // Query execution
1462         TSQLResult* aResult;
1463         aResult = fServer[system]->Query(sqlQuery);
1464         if (!aResult) {
1465                 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
1466                                 GetSystemName(system), id));
1467                 return 0;
1468         }
1469
1470         if (aResult->GetRowCount() == 0)
1471         {
1472                 Log(detector,
1473                         Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
1474                 delete aResult;
1475                 return 0;
1476         }
1477
1478         TSQLRow* aRow;
1479         TList *list = new TList();
1480         list->SetOwner(1);
1481
1482         while ((aRow = aResult->Next()))
1483         {
1484
1485                 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
1486                 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
1487                 list->Add(new TObjString(source));
1488                 delete aRow;
1489         }
1490
1491         delete aResult;
1492
1493         return list;
1494 }
1495
1496 //______________________________________________________________________________________________
1497 Bool_t AliShuttle::Connect(Int_t system)
1498 {
1499 // Connect to MySQL Server of the system's FXS MySQL databases
1500 // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
1501
1502         // check connection: if already connected return
1503         if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
1504
1505         TString dbHost, dbUser, dbPass, dbName;
1506
1507         if (system < 3) // FXS db servers
1508         {
1509                 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
1510                 dbUser = fConfig->GetFXSdbUser(system);
1511                 dbPass = fConfig->GetFXSdbPass(system);
1512                 dbName =   fConfig->GetFXSdbName(system);
1513         } else { // Run & Shuttle logbook servers
1514         // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
1515                 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
1516                 dbUser = fConfig->GetDAQlbUser();
1517                 dbPass = fConfig->GetDAQlbPass();
1518                 dbName =   fConfig->GetDAQlbDB();
1519         }
1520
1521         fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
1522         if (!fServer[system] || !fServer[system]->IsConnected()) {
1523                 if(system < 3)
1524                 {
1525                 AliError(Form("Can't establish connection to FXS database for %s",
1526                                         AliShuttleInterface::GetSystemName(system)));
1527                 } else {
1528                 AliError("Can't establish connection to Run logbook.");
1529                 }
1530                 if(fServer[system]) delete fServer[system];
1531                 return kFALSE;
1532         }
1533
1534         // Get tables
1535         TSQLResult* aResult=0;
1536         switch(system){
1537                 case kDAQ:
1538                         aResult = fServer[kDAQ]->GetTables(dbName.Data());
1539                         break;
1540                 case kDCS:
1541                         aResult = fServer[kDCS]->GetTables(dbName.Data());
1542                         break;
1543                 case kHLT:
1544                         aResult = fServer[kHLT]->GetTables(dbName.Data());
1545                         break;
1546                 default:
1547                         aResult = fServer[3]->GetTables(dbName.Data());
1548                         break;
1549         }
1550
1551         delete aResult;
1552         return kTRUE;
1553 }
1554
1555 //______________________________________________________________________________________________
1556 Bool_t AliShuttle::UpdateTable()
1557 {
1558 // Update FXS table filling time_processed field in all rows corresponding to current run and detector
1559
1560         Bool_t result = kTRUE;
1561
1562         for (UInt_t system=0; system<3; system++)
1563         {
1564                 if(!fFXSCalled[system]) continue;
1565
1566                 // check connection, in case connect
1567                 if (!Connect(system))
1568                 {
1569                         Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
1570                         result = kFALSE;
1571                         continue;
1572                 }
1573
1574                 TTimeStamp now; // now
1575
1576                 // Loop on FXS list entries
1577                 TIter iter(&fFXSlist[system]);
1578                 TObjString *aFXSentry=0;
1579                 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
1580                 {
1581                         TString aFXSentrystr = aFXSentry->String();
1582                         TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
1583                         if (!aFXSarray || aFXSarray->GetEntries() != 2 )
1584                         {
1585                                 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
1586                                         GetSystemName(system), aFXSentrystr.Data()));
1587                                 if(aFXSarray) delete aFXSarray;
1588                                 result = kFALSE;
1589                                 continue;
1590                         }
1591                         const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
1592                         const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
1593
1594                         TString whereClause;
1595                         if (system == kDAQ)
1596                         {
1597                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
1598                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
1599                         }
1600                         else if (system == kDCS)
1601                         {
1602                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
1603                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId);
1604                         }
1605                         else if (system == kHLT)
1606                         {
1607                                 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
1608                                                         GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
1609                         }
1610
1611                         delete aFXSarray;
1612
1613                         TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
1614                                                                 now.GetSec(), whereClause.Data());
1615
1616                         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1617
1618                         // Query execution
1619                         TSQLResult* aResult;
1620                         aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1621                         if (!aResult)
1622                         {
1623                                 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
1624                                                                 GetSystemName(system), sqlQuery.Data()));
1625                                 result = kFALSE;
1626                                 continue;
1627                         }
1628                         delete aResult;
1629                 }
1630         }
1631
1632         return result;
1633 }
1634
1635 //______________________________________________________________________________________________
1636 Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
1637 {
1638         //
1639         // Update Shuttle logbook filling detector or shuttle_done column
1640         // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
1641         //
1642
1643         // check connection, in case connect
1644         if(!Connect(3)){
1645                 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
1646                 return kFALSE;
1647         }
1648
1649         TString detName(detector);
1650         TString setClause;
1651         if(detName == "shuttle_done")
1652         {
1653                 setClause = "set shuttle_done=1";
1654
1655                 // Send the information to ML
1656                 TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
1657
1658                 TList mlList;
1659                 mlList.Add(&mlStatus);
1660
1661                 fMonaLisa->SendParameters(&mlList);
1662         } else {
1663                 TString statusStr(status);
1664                 if(statusStr.Contains("done", TString::kIgnoreCase) ||
1665                    statusStr.Contains("failed", TString::kIgnoreCase)){
1666                         setClause = Form("set %s=\"%s\"", detector, status);
1667                 } else {
1668                         Log("SHUTTLE",
1669                                 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
1670                                         status, detector));
1671                         return kFALSE;
1672                 }
1673         }
1674
1675         TString whereClause = Form("where run=%d", GetCurrentRun());
1676
1677         TString sqlQuery = Form("update %s %s %s",
1678                                         fConfig->GetShuttlelbTable(), setClause.Data(), whereClause.Data());
1679
1680         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1681
1682         // Query execution
1683         TSQLResult* aResult;
1684         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
1685         if (!aResult) {
1686                 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
1687                 return kFALSE;
1688         }
1689         delete aResult;
1690
1691         return kTRUE;
1692 }
1693
1694 //______________________________________________________________________________________________
1695 Int_t AliShuttle::GetCurrentRun() const
1696 {
1697 // Get current run from logbook entry
1698
1699         return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
1700 }
1701
1702 //______________________________________________________________________________________________
1703 UInt_t AliShuttle::GetCurrentStartTime() const
1704 {
1705 // get current start time
1706
1707         return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
1708 }
1709
1710 //______________________________________________________________________________________________
1711 UInt_t AliShuttle::GetCurrentEndTime() const
1712 {
1713 // get current end time from logbook entry
1714
1715         return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
1716 }
1717
1718 //______________________________________________________________________________________________
1719 void AliShuttle::Log(const char* detector, const char* message)
1720 {
1721 // Fill log string with a message
1722
1723         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
1724         if (dir == NULL) {
1725                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
1726                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
1727                         return;
1728                 }
1729
1730         } else {
1731                 gSystem->FreeDirectory(dir);
1732         }
1733
1734         TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
1735         if (GetCurrentRun() >= 0) 
1736                 toLog += Form("run %d - ", GetCurrentRun());
1737         toLog += Form("%s", message);
1738
1739         AliInfo(toLog.Data());
1740
1741         TString fileName;
1742         if (GetCurrentRun() >= 0) 
1743                 fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
1744         else
1745                 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
1746         
1747         gSystem->ExpandPathName(fileName);
1748
1749         ofstream logFile;
1750         logFile.open(fileName, ofstream::out | ofstream::app);
1751
1752         if (!logFile.is_open()) {
1753                 AliError(Form("Could not open file %s", fileName.Data()));
1754                 return;
1755         }
1756
1757         logFile << toLog.Data() << "\n";
1758
1759         logFile.close();
1760 }
1761
1762 //______________________________________________________________________________________________
1763 Bool_t AliShuttle::Collect(Int_t run)
1764 {
1765 //
1766 // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
1767 // If a dedicated run is given this run is processed
1768 //
1769 // In operational mode, this is the Shuttle function triggered by the EOR signal.
1770 //
1771
1772         if (run == -1)
1773                 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
1774         else
1775                 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
1776
1777         SetLastAction("Starting");
1778
1779         TString whereClause("where shuttle_done=0");
1780         if (run != -1)
1781                 whereClause += Form(" and run=%d", run);
1782
1783         TObjArray shuttleLogbookEntries;
1784         if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
1785         {
1786                 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1787                 return kFALSE;
1788         }
1789
1790         if (shuttleLogbookEntries.GetEntries() == 0)
1791         {
1792                 if (run == -1)
1793                         Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
1794                 else
1795                         Log("SHUTTLE", Form("Collect - Run %d is already DONE "
1796                                                 "or it does not exist in Shuttle logbook", run));
1797                 return kTRUE;
1798         }
1799
1800         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1801                 fFirstUnprocessed[iDet] = kTRUE;
1802
1803         if (run != -1)
1804         {
1805                 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
1806                 // flag them into fFirstUnprocessed array
1807                 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
1808                 TObjArray tmpLogbookEntries;
1809                 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
1810                 {
1811                         Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1812                         return kFALSE;
1813                 }
1814
1815                 TIter iter(&tmpLogbookEntries);
1816                 AliShuttleLogbookEntry* anEntry = 0;
1817                 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
1818                 {
1819                         for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1820                         {
1821                                 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1822                                 {
1823                                         AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1824                                                         anEntry->GetRun(), GetDetName(iDet)));
1825                                         fFirstUnprocessed[iDet] = kFALSE;
1826                                 }
1827                         }
1828
1829                 }
1830
1831         }
1832
1833         if (!RetrieveConditionsData(shuttleLogbookEntries))
1834         {
1835                 Log("SHUTTLE", "Collect - Process of at least one run failed");
1836                 return kFALSE;
1837         }
1838
1839         Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
1840         return kTRUE;
1841 }
1842
1843 //______________________________________________________________________________________________
1844 Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
1845 {
1846 // Retrieve conditions data for all runs that aren't processed yet
1847
1848         Bool_t hasError = kFALSE;
1849
1850         TIter iter(&dateEntries);
1851         AliShuttleLogbookEntry* anEntry;
1852
1853         while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
1854                 if (!Process(anEntry)){
1855                         hasError = kTRUE;
1856                 }
1857
1858                 // clean SHUTTLE temp directory
1859                 TString command = Form("rm -f %s/*.shuttle", GetShuttleTempDir());
1860                 gSystem->Exec(command.Data());
1861         }
1862
1863         return hasError == kFALSE;
1864 }
1865
1866 //______________________________________________________________________________________________
1867 ULong_t AliShuttle::GetTimeOfLastAction() const
1868 {
1869         ULong_t tmp;
1870
1871         fMonitoringMutex->Lock();
1872
1873         tmp = fLastActionTime;
1874
1875         fMonitoringMutex->UnLock();
1876
1877         return tmp;
1878 }
1879
1880 //______________________________________________________________________________________________
1881 const TString AliShuttle::GetLastAction() const
1882 {
1883         // returns a string description of the last action
1884
1885         TString tmp;
1886
1887         fMonitoringMutex->Lock();
1888         
1889         tmp = fLastAction;
1890         
1891         fMonitoringMutex->UnLock();
1892
1893         return tmp;
1894 }
1895
1896 //______________________________________________________________________________________________
1897 void AliShuttle::SetLastAction(const char* action)
1898 {
1899         // updates the monitoring variables
1900
1901         fMonitoringMutex->Lock();
1902
1903         fLastAction = action;
1904         fLastActionTime = time(0);
1905         
1906         fMonitoringMutex->UnLock();
1907 }
1908
1909 //______________________________________________________________________________________________
1910 const char* AliShuttle::GetRunParameter(const char* param)
1911 {
1912 // returns run parameter read from DAQ logbook
1913
1914         if(!fLogbookEntry) {
1915                 AliError("No logbook entry!");
1916                 return 0;
1917         }
1918
1919         return fLogbookEntry->GetRunParameter(param);
1920 }
1921
1922 //______________________________________________________________________________________________
1923 AliCDBEntry* AliShuttle::GetFromOCDB(const AliCDBPath& path)
1924 {
1925 // returns obiect from OCDB valid for current run
1926
1927         AliCDBStorage *sto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
1928         if (!sto)
1929         {
1930                 Log("SHUTTLE", "GetFromOCDB - Cannot activate main OCDB for query!");
1931                 return 0;
1932         }
1933
1934         return dynamic_cast<AliCDBEntry*> (sto->Get(path, GetCurrentRun()));
1935 }
1936
1937 //______________________________________________________________________________________________
1938 Bool_t AliShuttle::SendMail()
1939 {
1940 // sends a mail to the subdetector expert in case of preprocessor error
1941
1942         void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
1943         if (dir == NULL)
1944         {
1945                 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
1946                 {
1947                         AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
1948                         return kFALSE;
1949                 }
1950
1951         } else {
1952                 gSystem->FreeDirectory(dir);
1953         }
1954
1955         TString bodyFileName;
1956         bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
1957         gSystem->ExpandPathName(bodyFileName);
1958
1959         ofstream mailBody;
1960         mailBody.open(bodyFileName, ofstream::out);
1961
1962         if (!mailBody.is_open())
1963         {
1964                 AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
1965                 return kFALSE;
1966         }
1967
1968         TString to="";
1969         TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
1970         TObjString *anExpert=0;
1971         while ((anExpert = (TObjString*) iterExperts.Next()))
1972         {
1973                 to += Form("%s,", anExpert->GetName());
1974         }
1975         to.Remove(to.Length()-1);
1976         AliDebug(2, Form("to: %s",to.Data()));
1977
1978         // TODO this will be removed...
1979         if (to.Contains("not_yet_set")) {
1980                 AliInfo("List of detector responsibles not yet set!");
1981                 return kFALSE;
1982         }
1983
1984         TString cc="alberto.colla@cern.ch";
1985
1986         TString subject = Form("%s Shuttle preprocessor error in run %d !",
1987                                 fCurrentDetector.Data(), GetCurrentRun());
1988         AliDebug(2, Form("subject: %s", subject.Data()));
1989
1990         TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
1991         body += Form("SHUTTLE just detected that your preprocessor "
1992                         "exited with ERROR state in run %d!!\n\n", GetCurrentRun());
1993         body += Form("Please check %s status on the web page asap!\n\n", fCurrentDetector.Data());
1994         body += Form("The last 10 lines of %s log file are following:\n\n");
1995
1996         AliDebug(2, Form("Body begin: %s", body.Data()));
1997
1998         mailBody << body.Data();
1999         mailBody.close();
2000         mailBody.open(bodyFileName, ofstream::out | ofstream::app);
2001
2002         TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
2003         TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
2004         if (gSystem->Exec(tailCommand.Data()))
2005         {
2006                 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
2007         }
2008
2009         TString endBody = Form("------------------------------------------------------\n\n");
2010         endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
2011         endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
2012         endBody += "Sincerely yours,\n\n \t\t\tthe SHUTTLE\n";
2013
2014         AliDebug(2, Form("Body end: %s", endBody.Data()));
2015
2016         mailBody << endBody.Data();
2017
2018         mailBody.close();
2019
2020         // send mail!
2021         TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
2022                                                 subject.Data(),
2023                                                 cc.Data(),
2024                                                 to.Data(),
2025                                                 bodyFileName.Data());
2026         AliDebug(2, Form("mail command: %s", mailCommand.Data()));
2027
2028         Bool_t result = gSystem->Exec(mailCommand.Data());
2029
2030         return result == 0;
2031 }
2032
2033 //______________________________________________________________________________________________
2034 void AliShuttle::SetRunType()
2035 {
2036 // Gets run type from logbook and fills current Shuttle logbook entry
2037
2038         // check connection, in case connect
2039         if(!Connect(3)){
2040                 Log("SHUTTLE", "GetRunType - Couldn't connect to DAQ Logbook.");
2041                 return;
2042         }
2043
2044         TString sqlQuery = Form("select detector,run_type from %s where run_number=%d",
2045                                         fConfig->GetRunTypelbTable(), GetCurrentRun());
2046
2047         AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2048
2049         // Query execution
2050         TSQLResult* aResult;
2051         aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2052         if (!aResult) {
2053                 Log("SHUTTLE", Form("GetRunType - Can't execute query <%s>", sqlQuery.Data()));
2054                 return;
2055         }
2056
2057         TSQLRow* aRow;
2058         while ((aRow = aResult->Next())) {
2059                 TString det(aRow->GetField(0), aRow->GetFieldLength(0));
2060                 TString runType(aRow->GetField(1), aRow->GetFieldLength(1));
2061
2062                 fLogbookEntry->SetRunType(det, runType);
2063                 delete aRow;
2064         }
2065
2066         delete aResult;
2067
2068         return;
2069
2070 }
2071
2072 //______________________________________________________________________________________________
2073 const char* AliShuttle::GetRunType(const char* detCode)
2074 {
2075 // returns run type read from "run type" logbook
2076
2077         if(!fLogbookEntry) {
2078                 AliError("No logbook entry!");
2079                 return 0;
2080         }
2081
2082         return fLogbookEntry->GetRunType(detCode);
2083 }
2084
2085 //______________________________________________________________________________________________
2086 void AliShuttle::SetShuttleTempDir(const char* tmpDir)
2087 {
2088 // sets Shuttle temp directory
2089
2090         fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
2091 }
2092
2093 //______________________________________________________________________________________________
2094 void AliShuttle::SetShuttleLogDir(const char* logDir)
2095 {
2096 // sets Shuttle log directory
2097
2098         fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
2099 }