]> git.uio.no Git - u/mrichter/AliRoot.git/blame_incremental - SHUTTLE/AliShuttle.cxx
Record changes.
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
... / ...
CommitLineData
1/**************************************************************************
2 * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3 * *
4 * Author: The ALICE Off-line Project. *
5 * Contributors are mentioned in the code where appropriate. *
6 * *
7 * Permission to use, copy, modify and distribute this software and its *
8 * documentation strictly for non-commercial purposes is hereby granted *
9 * without fee, provided that the above copyright notice appears in all *
10 * copies and that both the copyright notice and this permission notice *
11 * appear in the supporting documentation. The authors make no claims *
12 * about the suitability of this software for any purpose. It is *
13 * provided "as is" without express or implied warranty. *
14 **************************************************************************/
15
16/*
17$Log$
18Revision 1.19 2006/11/06 14:23:04 jgrosseo
19major update (Alberto)
20o) reading of run parameters from the logbook
21o) online offline naming conversion
22o) standalone DCSclient package
23
24Revision 1.18 2006/10/20 15:22:59 jgrosseo
25o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
26o) Merging Collect, CollectAll, CollectNew function
27o) Removing implementation of empty copy constructors (declaration still there!)
28
29Revision 1.17 2006/10/05 16:20:55 jgrosseo
30adapting to new CDB classes
31
32Revision 1.16 2006/10/05 15:46:26 jgrosseo
33applying to the new interface
34
35Revision 1.15 2006/10/02 16:38:39 jgrosseo
36update (alberto):
37fixed memory leaks
38storing of objects that failed to be stored to the grid before
39interfacing of shuttle status table in daq system
40
41Revision 1.14 2006/08/29 09:16:05 jgrosseo
42small update
43
44Revision 1.13 2006/08/15 10:50:00 jgrosseo
45effc++ corrections (alberto)
46
47Revision 1.12 2006/08/08 14:19:29 jgrosseo
48Update to shuttle classes (Alberto)
49
50- Possibility to set the full object's path in the Preprocessor's and
51Shuttle's Store functions
52- Possibility to extend the object's run validity in the same classes
53("startValidity" and "validityInfinite" parameters)
54- Implementation of the StoreReferenceData function to store reference
55data in a dedicated CDB storage.
56
57Revision 1.11 2006/07/21 07:37:20 jgrosseo
58last run is stored after each run
59
60Revision 1.10 2006/07/20 09:54:40 jgrosseo
61introducing status management: The processing per subdetector is divided into several steps,
62after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
63can keep track of the number of failures and skips further processing after a certain threshold is
64exceeded. These thresholds can be configured in LDAP.
65
66Revision 1.9 2006/07/19 10:09:55 jgrosseo
67new configuration, accesst to DAQ FES (Alberto)
68
69Revision 1.8 2006/07/11 12:44:36 jgrosseo
70adding parameters for extended validity range of data produced by preprocessor
71
72Revision 1.7 2006/07/10 14:37:09 jgrosseo
73small fix + todo comment
74
75Revision 1.6 2006/07/10 13:01:41 jgrosseo
76enhanced storing of last sucessfully processed run (alberto)
77
78Revision 1.5 2006/07/04 14:59:57 jgrosseo
79revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
80
81Revision 1.4 2006/06/12 09:11:16 jgrosseo
82coding conventions (Alberto)
83
84Revision 1.3 2006/06/06 14:26:40 jgrosseo
85o) removed files that were moved to STEER
86o) shuttle updated to follow the new interface (Alberto)
87
88Revision 1.2 2006/03/07 07:52:34 hristov
89New version (B.Yordanov)
90
91Revision 1.6 2005/11/19 17:19:14 byordano
92RetrieveDATEEntries and RetrieveConditionsData added
93
94Revision 1.5 2005/11/19 11:09:27 byordano
95AliShuttle declaration added
96
97Revision 1.4 2005/11/17 17:47:34 byordano
98TList changed to TObjArray
99
100Revision 1.3 2005/11/17 14:43:23 byordano
101import to local CVS
102
103Revision 1.1.1.1 2005/10/28 07:33:58 hristov
104Initial import as subdirectory in AliRoot
105
106Revision 1.2 2005/09/13 08:41:15 byordano
107default startTime endTime added
108
109Revision 1.4 2005/08/30 09:13:02 byordano
110some docs added
111
112Revision 1.3 2005/08/29 21:15:47 byordano
113some docs added
114
115*/
116
117//
118// This class is the main manager for AliShuttle.
119// It organizes the data retrieval from DCS and call the
120// interface methods of AliPreprocessor.
121// For every detector in AliShuttleConfgi (see AliShuttleConfig),
122// data for its set of aliases is retrieved. If there is registered
123// AliPreprocessor for this detector then it will be used
124// accroding to the schema (see AliPreprocessor).
125// If there isn't registered AliPreprocessor than the retrieved
126// data is stored automatically to the undelying AliCDBStorage.
127// For detSpec is used the alias name.
128//
129
130#include "AliShuttle.h"
131
132#include "AliCDBManager.h"
133#include "AliCDBStorage.h"
134#include "AliCDBId.h"
135#include "AliCDBRunRange.h"
136#include "AliCDBPath.h"
137#include "AliCDBEntry.h"
138#include "AliShuttleConfig.h"
139#include "DCSClient/AliDCSClient.h"
140#include "AliLog.h"
141#include "AliPreprocessor.h"
142#include "AliShuttleStatus.h"
143#include "AliShuttleLogbookEntry.h"
144
145#include <TSystem.h>
146#include <TObject.h>
147#include <TString.h>
148#include <TTimeStamp.h>
149#include <TObjString.h>
150#include <TSQLServer.h>
151#include <TSQLResult.h>
152#include <TSQLRow.h>
153#include <TMutex.h>
154
155#include <fstream>
156
157#include <sys/types.h>
158#include <sys/wait.h>
159
160ClassImp(AliShuttle)
161
162TString AliShuttle::fgkMainCDB("alien://folder=ShuttleCDB");
163TString AliShuttle::fgkLocalCDB("local://LocalShuttleCDB");
164TString AliShuttle::fgkMainRefStorage("alien://folder=ShuttleReference");
165TString AliShuttle::fgkLocalRefStorage("local://LocalReferenceStorage");
166
167Bool_t AliShuttle::fgkProcessDCS(kTRUE);
168
169const char* AliShuttle::fgkShuttleTempDir = gSystem->ExpandPathName("$ALICE_ROOT/SHUTTLE/temp");
170const char* AliShuttle::fgkShuttleLogDir = gSystem->ExpandPathName("$ALICE_ROOT/SHUTTLE/log");
171
172//______________________________________________________________________________________________
173AliShuttle::AliShuttle(const AliShuttleConfig* config,
174 UInt_t timeout, Int_t retries):
175fConfig(config),
176fTimeout(timeout), fRetries(retries),
177fPreprocessorMap(),
178fLogbookEntry(0),
179fCurrentDetector(),
180fStatusEntry(0),
181fGridError(kFALSE),
182fMonitoringMutex(0),
183fLastActionTime(0),
184fLastAction()
185{
186 //
187 // config: AliShuttleConfig used
188 // timeout: timeout used for AliDCSClient connection
189 // retries: the number of retries in case of connection error.
190 //
191
192 if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
193 for(int iSys=0;iSys<4;iSys++) {
194 fServer[iSys]=0;
195 if (iSys < 3)
196 fFESlist[iSys].SetOwner(kTRUE);
197 }
198 fPreprocessorMap.SetOwner(kTRUE);
199
200 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
201 fFirstUnprocessed[iDet] = kFALSE;
202
203 fMonitoringMutex = new TMutex();
204}
205
206//______________________________________________________________________________________________
207AliShuttle::~AliShuttle()
208{
209// destructor
210
211 fPreprocessorMap.DeleteAll();
212 for(int iSys=0;iSys<4;iSys++)
213 if(fServer[iSys]) {
214 fServer[iSys]->Close();
215 delete fServer[iSys];
216 fServer[iSys] = 0;
217 }
218
219 if (fStatusEntry){
220 delete fStatusEntry;
221 fStatusEntry = 0;
222 }
223
224 if (fMonitoringMutex)
225 {
226 delete fMonitoringMutex;
227 fMonitoringMutex = 0;
228 }
229}
230
231//______________________________________________________________________________________________
232void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
233{
234 //
235 // Registers new AliPreprocessor.
236 // It uses GetName() for indentificator of the pre processor.
237 // The pre processor is registered it there isn't any other
238 // with the same identificator (GetName()).
239 //
240
241 const char* detName = preprocessor->GetName();
242 if(GetDetPos(detName) < 0)
243 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
244
245 if (fPreprocessorMap.GetValue(detName)) {
246 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
247 return;
248 }
249
250 fPreprocessorMap.Add(new TObjString(detName), preprocessor);
251}
252//______________________________________________________________________________________________
253UInt_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
254 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
255{
256 // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
257 // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
258 // using this function. Use StoreReferenceData instead!
259 // It calls WriteToCDB function which perform actual storage
260
261 return WriteToCDB(fgkMainCDB, fgkLocalCDB, path, object,
262 metaData, validityStart, validityInfinite);
263
264}
265
266//______________________________________________________________________________________________
267UInt_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
268{
269 // Stores a CDB object in the storage for reference data. This objects will not be available during
270 // offline reconstrunction. Use this function for reference data only!
271 // It calls WriteToCDB function which perform actual storage
272
273 return WriteToCDB(fgkMainRefStorage, fgkLocalRefStorage, path, object, metaData);
274
275}
276
277//______________________________________________________________________________________________
278UInt_t AliShuttle::WriteToCDB(const char* mainUri, const char* localUri,
279 const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
280 Int_t validityStart, Bool_t validityInfinite)
281{
282 // write object into the CDB. Parameters are passed by Store and StoreReferenceData functions.
283 // The parameters are:
284 // 1) Uri of the main storage (Grid)
285 // 2) Uri of the backup storage (Local)
286 // 3) the object's path.
287 // 4) the object to be stored
288 // 5) the metaData to be associated with the object
289 // 6) the validity start run number w.r.t. the current run,
290 // if the data is valid only for this run leave the default 0
291 // 7) specifies if the calibration data is valid for infinity (this means until updated),
292 // typical for calibration runs, the default is kFALSE
293 //
294 // returns 0 if fail
295 // 1 if stored in main (Grid) storage
296 // 2 if stored in backup (Local) storage
297
298 const char* cdbType = (mainUri == fgkMainCDB) ? "CDB" : "Reference";
299
300 Int_t firstRun = GetCurrentRun() - validityStart;
301 if(firstRun < 0) {
302 AliError("First valid run happens to be less than 0! Setting it to 0.");
303 firstRun=0;
304 }
305
306 Int_t lastRun = -1;
307 if(validityInfinite) {
308 lastRun = AliCDBRunRange::Infinity();
309 } else {
310 lastRun = GetCurrentRun();
311 }
312
313 AliCDBId id(path, firstRun, lastRun, -1, -1);
314
315 if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
316 TObjString runUsed = Form("%d", GetCurrentRun());
317 metaData->SetProperty("RunUsed(TObjString)",&runUsed);
318 }
319
320 UInt_t result = 0;
321
322 if (!(AliCDBManager::Instance()->GetStorage(mainUri))) {
323 AliError(Form("WriteToCDB - Cannot activate main %s storage", cdbType));
324 } else {
325 result = (UInt_t) AliCDBManager::Instance()->GetStorage(mainUri)
326 ->Put(object, id, metaData);
327 }
328
329 if(!result) {
330
331 Log(fCurrentDetector,
332 Form("WriteToCDB - Problem with main %s storage. Putting <%s> into backup storage",
333 cdbType, path.GetPath().Data()));
334
335 // Set Grid version to current run number, to ease retrieval later
336 id.SetVersion(GetCurrentRun());
337
338 result = AliCDBManager::Instance()->GetStorage(localUri)
339 ->Put(object, id, metaData);
340
341 if(result) {
342 result = 2;
343 fGridError = kTRUE;
344 }else{
345 Log(fCurrentDetector, "WriteToCDB - Can't store data!");
346 }
347 }
348
349 return result;
350
351}
352
353//______________________________________________________________________________________________
354AliShuttleStatus* AliShuttle::ReadShuttleStatus()
355{
356// Reads the AliShuttleStatus from the CDB
357
358 if (fStatusEntry){
359 delete fStatusEntry;
360 fStatusEntry = 0;
361 }
362
363 fStatusEntry = AliCDBManager::Instance()->GetStorage(AliShuttle::GetLocalCDB())
364 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
365
366 if (!fStatusEntry) return 0;
367 fStatusEntry->SetOwner(1);
368
369 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
370 if (!status) {
371 AliError("Invalid object stored to CDB!");
372 return 0;
373 }
374
375 return status;
376}
377
378//______________________________________________________________________________________________
379Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
380{
381// writes the status for one subdetector
382
383 if (fStatusEntry){
384 delete fStatusEntry;
385 fStatusEntry = 0;
386 }
387
388 Int_t run = GetCurrentRun();
389
390 AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
391
392 fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
393 fStatusEntry->SetOwner(1);
394
395 UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
396
397 if (!result) {
398 AliError(Form("WriteShuttleStatus for %s, run %d failed", fCurrentDetector.Data(), run));
399 return kFALSE;
400 }
401
402 return kTRUE;
403}
404
405//______________________________________________________________________________________________
406void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
407{
408 // changes the AliShuttleStatus for the given detector and run to the given status
409
410 if (!fStatusEntry){
411 AliError("UNEXPECTED: fStatusEntry empty");
412 return;
413 }
414
415 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
416
417 if (!status){
418 AliError("UNEXPECTED: status could not be read from current CDB entry");
419 return;
420 }
421
422 TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
423 fCurrentDetector.Data(),
424 status->GetStatusName(),
425 status->GetStatusName(newStatus));
426 Log("SHUTTLE", actionStr);
427 SetLastAction(actionStr);
428
429 status->SetStatus(newStatus);
430 if (increaseCount) status->IncreaseCount();
431
432 AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
433}
434//______________________________________________________________________________________________
435Bool_t AliShuttle::ContinueProcessing()
436{
437// this function reads the AliShuttleStatus information from CDB and
438// checks if the processing should be continued
439// if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
440
441 AliShuttleLogbookEntry::Status entryStatus =
442 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
443
444 if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
445 Log("SHUTTLE", Form("ContinueProcessing - %s is %s",
446 fCurrentDetector.Data(),
447 fLogbookEntry->GetDetectorStatusName(entryStatus)));
448 return kFALSE;
449 }
450
451 // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
452
453 // check if current run is first unprocessed run for current detector
454 if (fConfig->StrictRunOrder(fCurrentDetector) &&
455 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
456 {
457 Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
458 return kFALSE;
459 }
460
461 AliShuttleStatus* status = ReadShuttleStatus();
462 if (!status) {
463 // first time
464 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
465 fCurrentDetector.Data()));
466 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
467 return WriteShuttleStatus(status);
468 }
469
470 // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
471 // If it happens it may mean Logbook updating failed... let's do it now!
472 if (status->GetStatus() == AliShuttleStatus::kDone ||
473 status->GetStatus() == AliShuttleStatus::kFailed){
474 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
475 fCurrentDetector.Data(),
476 status->GetStatusName(status->GetStatus())));
477 UpdateShuttleLogbook(fCurrentDetector.Data(),
478 status->GetStatusName(status->GetStatus()));
479 return kFALSE;
480 }
481
482 if (status->GetStatus() == AliShuttleStatus::kStoreFailed) {
483 Log("SHUTTLE",
484 Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
485 fCurrentDetector.Data()));
486 if(TryToStoreAgain()){
487 Log(fCurrentDetector.Data(), "ContinueProcessing - All objects successfully stored into OCDB");
488 UpdateShuttleStatus(AliShuttleStatus::kDone);
489 UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
490 } else {
491 Log("SHUTTLE",
492 Form("ContinueProcessing - %s: Grid storage failed again",
493 fCurrentDetector.Data()));
494 }
495 return kFALSE;
496 }
497
498 // if we get here, there is a restart
499
500 // abort conditions
501 if (status->GetCount() >= fConfig->GetMaxRetries()) {
502 Log("SHUTTLE",
503 Form("ContinueProcessing - %s failed %d times in status %s - Updating Shuttle Logbook",
504 fCurrentDetector.Data(),
505 status->GetCount(), status->GetStatusName()));
506 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
507 return kFALSE;
508 }
509
510 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. Aborted before with %s. Retry number %d.",
511 fCurrentDetector.Data(),
512 status->GetStatusName(), status->GetCount()));
513
514 UpdateShuttleStatus(AliShuttleStatus::kStarted, kTRUE);
515
516 return kTRUE;
517}
518
519//______________________________________________________________________________________________
520Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
521{
522 //
523 // Makes data retrieval for all detectors in the configuration.
524 // entry: Shuttle logbook entry, contains run paramenters and status of detectors
525 // (Unprocessed, Inactive, Failed or Done).
526 // Returns kFALSE in case of error occured and kTRUE otherwise
527 //
528
529 if(!entry) return kFALSE;
530
531 fLogbookEntry = entry;
532
533 if(fLogbookEntry->IsDone()){
534 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
535 UpdateShuttleLogbook("shuttle_done");
536 fLogbookEntry = 0;
537 return kTRUE;
538 }
539
540
541 AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
542 GetCurrentRun()));
543
544 fLogbookEntry->Print("all");
545
546 // Initialization
547 Bool_t hasError = kFALSE;
548 for(Int_t iSys=0;iSys<3;iSys++) fFESCalled[iSys]=kFALSE;
549
550 AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
551 if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
552 AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
553 if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
554
555 // Loop on detectors in the configuration
556 TIter iter(fConfig->GetDetectors());
557 TObjString* aDetector = 0;
558
559 while ((aDetector = (TObjString*) iter.Next()))
560 {
561 fCurrentDetector = aDetector->String();
562
563 if (!fConfig->HostProcessDetector(fCurrentDetector)) continue;
564
565 AliPreprocessor* aPreprocessor =
566 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
567 if (!aPreprocessor)
568 {
569 Log("SHUTTLE",Form("Process - %s: no preprocessor registered. Skipping",
570 fCurrentDetector.Data()));
571 continue;
572 }
573
574 if (ContinueProcessing() == kFALSE) continue;
575
576 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START ******",
577 GetCurrentRun(), aDetector->GetName()));
578
579
580 Int_t pid = fork();
581
582 if (pid < 0)
583 {
584 Log("SHUTTLE", "ERROR: Forking failed");
585 }
586 else if (pid > 0)
587 {
588 // parent
589 AliInfo(Form("In parent process of %d - %s: Starting monitoring",
590 GetCurrentRun(), aDetector->GetName()));
591
592 Long_t begin = time(0);
593
594 int status; // to be used with waitpid, on purpose an int (not Int_t)!
595 while (waitpid(pid, &status, WNOHANG) == 0)
596 {
597 Long_t expiredTime = time(0) - begin;
598
599 if (expiredTime > fConfig->GetPPTimeOut())
600 {
601 Log("SHUTTLE", Form("Process time out. Run time: %d seconds. Killing...",
602 expiredTime));
603
604 kill(pid, 9);
605
606 hasError = kTRUE;
607
608 gSystem->Sleep(1000);
609 }
610 else
611 {
612 if (expiredTime % 60 == 0)
613 Log("SHUTTLE", Form("Checked process. Run time: %d seconds.",
614 expiredTime));
615 gSystem->Sleep(1000);
616 }
617 }
618
619 AliInfo(Form("In parent process of %d - %s: Client has terminated.",
620 GetCurrentRun(), aDetector->GetName()));
621
622 if (WIFEXITED(status))
623 {
624 Int_t returnCode = WEXITSTATUS(status);
625
626 Log("SHUTTLE", Form("The return code is %d", returnCode));
627
628 if (returnCode != 0)
629 hasError = kTRUE;
630 }
631 }
632 else if (pid == 0)
633 {
634 // client
635 AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
636
637 UInt_t result = ProcessCurrentDetector();
638
639 Int_t returnCode = 0; // will be set to 1 in case of an error
640
641 if (!result)
642 {
643 returnCode = 1;
644 AliInfo(Form("\n \t\t\t****** run %d - %s: PREPROCESSOR ERROR ****** \n\n",
645 GetCurrentRun(), aDetector->GetName()));
646 }
647 else if (result == 2)
648 {
649 AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
650 GetCurrentRun(), aDetector->GetName()));
651 } else
652 {
653 AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
654 GetCurrentRun(), aDetector->GetName()));
655 }
656
657 if (result > 0)
658 {
659 // Process successful: Update time_processed field in FES logbooks!
660 if (fFESCalled[kDAQ])
661 {
662 if (UpdateDAQTable() == kFALSE)
663 returnCode = 1;
664 fFESlist[kDAQ].Clear();
665 }
666 //if(fFESCalled[kDCS]) {
667 // if (UpdateDCSTable(aDetector->GetName()) == kFALSE)
668 // returnCode = 1;
669 // fFESlist[kDCS].Clear();
670 //}
671 //if(fFESCalled[kHLT]) {
672 // if (UpdateHLTTable(aDetector->GetName()) == kFALSE)
673 // returnCode = 1;
674 // fFESlist[kHLT].Clear();
675 //}
676 }
677
678 AliInfo(Form("Client process of %d - %s is exiting now with %d.",
679 GetCurrentRun(), aDetector->GetName(), returnCode));
680
681 // the client exits here
682 gSystem->Exit(returnCode);
683
684 AliError("We should never get here!!!");
685 }
686 }
687
688 AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
689 GetCurrentRun()));
690
691 //check if shuttle is done for this run, if so update logbook
692 TObjArray checkEntryArray;
693 checkEntryArray.SetOwner(1);
694 TString whereClause = Form("where run=%d",GetCurrentRun());
695 if (QueryShuttleLogbook(whereClause.Data(), checkEntryArray)) {
696
697 AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
698 (checkEntryArray.At(0));
699
700 if (checkEntry)
701 {
702 if (checkEntry->IsDone())
703 {
704 Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
705 UpdateShuttleLogbook("shuttle_done");
706 }
707 else
708 {
709 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
710 {
711 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
712 {
713 AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
714 checkEntry->GetRun(), GetDetName(iDet)));
715 fFirstUnprocessed[iDet] = kFALSE;
716 }
717 }
718 }
719 }
720 }
721
722 fLogbookEntry = 0;
723
724 return hasError == kFALSE;
725}
726
727//______________________________________________________________________________________________
728UInt_t AliShuttle::ProcessCurrentDetector()
729{
730 //
731 // Makes data retrieval just for a specific detector (fCurrentDetector).
732 // Threre should be a configuration for this detector.
733
734 AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
735
736 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
737
738 TString host(fConfig->GetDCSHost(fCurrentDetector));
739 Int_t port = fConfig->GetDCSPort(fCurrentDetector);
740
741 TIter iter(fConfig->GetDCSAliases(fCurrentDetector));
742 TObjString* anAlias;
743 TMap aliasMap;
744 aliasMap.SetOwner(1);
745
746 Bool_t aDCSError = kFALSE;
747 fGridError = kFALSE;
748
749 while ((anAlias = (TObjString*) iter.Next())) {
750 TObjArray *valueSet = new TObjArray();
751 valueSet->SetOwner(1);
752 // TODO Test only... I've added a flag that allows to
753 // exclude DCS archive DB query
754 if(fgkProcessDCS){
755 AliInfo("Querying DCS archive DB data...");
756 aDCSError = (GetValueSet(host, port, anAlias->String(), valueSet) == 0);
757 } else {
758 AliInfo(Form("Skipping DCS processing. Port = %d",port));
759 aDCSError = kFALSE;
760 }
761 if(!aDCSError) {
762 aliasMap.Add(anAlias->Clone(), valueSet);
763 }else{
764 Log(fCurrentDetector, Form("ProcessCurrentDetector - Error while retrieving alias %s",
765 anAlias->GetName()));
766 UpdateShuttleStatus(AliShuttleStatus::kDCSError, kTRUE);
767 aliasMap.DeleteAll();
768 return 0;
769 }
770 }
771
772 // DCS Archive DB processing successful. Call Preprocessor!
773 UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
774
775 AliPreprocessor* aPreprocessor =
776 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
777
778 aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
779 UInt_t aPPResult = aPreprocessor->Process(&aliasMap);
780
781 UInt_t returnValue = 0;
782 if (aPPResult == 0) { // Preprocessor error
783 UpdateShuttleStatus(AliShuttleStatus::kPPError);
784 returnValue = 0;
785 } else if (fGridError == kFALSE) { // process and Grid storage ok!
786 UpdateShuttleStatus(AliShuttleStatus::kDone);
787 UpdateShuttleLogbook(fCurrentDetector, "DONE");
788 Log(fCurrentDetector.Data(),
789 "ProcessCurrentDetector - Preprocessor and Grid storage ended successfully");
790 returnValue = 1;
791 } else { // Grid storage error (process ok, but object put in local storage)
792 UpdateShuttleStatus(AliShuttleStatus::kStoreFailed);
793 returnValue = 2;
794 }
795
796 aliasMap.DeleteAll();
797
798 return returnValue;
799}
800
801//______________________________________________________________________________________________
802Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
803 TObjArray& entries)
804{
805// Query DAQ's Shuttle logbook and fills detector status object.
806// Call QueryRunParameters to query DAQ logbook for run parameters.
807
808 // check connection, in case connect
809 if(!Connect(3)) return kFALSE;
810
811 TString sqlQuery;
812 sqlQuery = Form("select * from logbook_shuttle %s order by run", whereClause);
813
814 TSQLResult* aResult = fServer[3]->Query(sqlQuery);
815 if (!aResult) {
816 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
817 return kFALSE;
818 }
819
820 if(aResult->GetRowCount() == 0) {
821 if(sqlQuery.Contains("where shuttle_done=0")){
822 Log("SHUTTLE", "QueryShuttleLogbook - All runs in Shuttle Logbook are already DONE");
823 delete aResult;
824 return kTRUE;
825 } else {
826 AliError("No entries in Shuttle Logbook match request");
827 delete aResult;
828 return kFALSE;
829 }
830 }
831
832 // TODO Check field count!
833 const UInt_t nCols = 24;
834 if (aResult->GetFieldCount() != (Int_t) nCols) {
835 AliError("Invalid SQL result field number!");
836 delete aResult;
837 return kFALSE;
838 }
839
840 entries.SetOwner(1);
841
842 TSQLRow* aRow;
843 while ((aRow = aResult->Next())) {
844 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
845 Int_t run = runString.Atoi();
846
847 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
848 if (!entry)
849 continue;
850
851 // loop on detectors
852 for(UInt_t ii = 0; ii < nCols; ii++)
853 entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
854
855 entries.AddLast(entry);
856 delete aRow;
857 }
858
859 if(sqlQuery.Contains("where shuttle_done=0"))
860 Log("SHUTTLE", Form("QueryShuttleLogbook - Found %d unprocessed runs in Shuttle Logbook",
861 entries.GetEntriesFast()));
862 delete aResult;
863 return kTRUE;
864}
865
866//______________________________________________________________________________________________
867AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
868{
869 //
870 // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
871 //
872
873 // check connection, in case connect
874 if (!Connect(3))
875 return 0;
876
877 TString sqlQuery;
878 sqlQuery.Form("select * from logbook where run=%d", run);
879
880 TSQLResult* aResult = fServer[3]->Query(sqlQuery);
881 if (!aResult) {
882 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
883 return 0;
884 }
885
886 if (aResult->GetRowCount() == 0) {
887 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
888 delete aResult;
889 return 0;
890 }
891
892 if (aResult->GetRowCount() > 1) {
893 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
894 delete aResult;
895 return 0;
896 }
897
898 TSQLRow* aRow = aResult->Next();
899 if (!aRow)
900 {
901 AliError(Form("Could not retrieve row for run %d. Skipping", run));
902 delete aResult;
903 return 0;
904 }
905
906 AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
907
908 for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
909 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
910
911 UInt_t startTime = entry->GetStartTime();
912 UInt_t endTime = entry->GetEndTime();
913
914 if (!startTime || !endTime || startTime > endTime) {
915 Log("SHUTTLE",
916 Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
917 run, startTime, endTime));
918 delete entry;
919 delete aRow;
920 delete aResult;
921 return 0;
922 }
923
924 delete aRow;
925 delete aResult;
926
927 return entry;
928}
929
930//______________________________________________________________________________________________
931Bool_t AliShuttle::TryToStoreAgain()
932{
933 // Called in case the detector failed to store the object in Grid OCDB
934 // It tries to store the object again, if it does not find more recent and overlapping objects
935 // Calls underlying TryToStoreAgain(const char*) function twice, for OCDB and Reference storage.
936
937 AliInfo("Trying to store OCDB data again...");
938 Bool_t resultCDB = TryToStoreAgain(fgkMainCDB);
939
940 AliInfo("Trying to store reference data again...");
941 Bool_t resultRef = TryToStoreAgain(fgkMainRefStorage);
942
943 return resultCDB && resultRef;
944}
945
946//______________________________________________________________________________________________
947Bool_t AliShuttle::TryToStoreAgain(TString& gridURI)
948{
949 // Called by TryToStoreAgain(), performs actual storage retry
950
951 TObjArray* gridIds=0;
952
953 Bool_t result = kTRUE;
954
955 const char* type = 0;
956 TString backupURI;
957 if(gridURI == fgkMainCDB) {
958 type = "OCDB";
959 backupURI = fgkLocalCDB;
960 } else if(gridURI == fgkMainRefStorage) {
961 type = "reference";
962 backupURI = fgkLocalRefStorage;
963 } else {
964 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
965 return kFALSE;
966 }
967
968 AliCDBManager* man = AliCDBManager::Instance();
969
970 AliCDBStorage *gridSto = man->GetStorage(gridURI);
971 if(!gridSto) {
972 Log(fCurrentDetector.Data(),
973 Form("TryToStoreAgain - cannot activate main %s storage", type));
974 return kFALSE;
975 }
976
977 gridIds = gridSto->GetQueryCDBList();
978
979 // get objects previously stored in local CDB
980 AliCDBStorage *backupSto = man->GetStorage(backupURI);
981 AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
982 // Local objects were stored with current run as Grid version!
983 TList* localEntries = backupSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
984 localEntries->SetOwner(1);
985
986 // loop on local stored objects
987 TIter localIter(localEntries);
988 AliCDBEntry *aLocEntry = 0;
989 while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
990 aLocEntry->SetOwner(1);
991 AliCDBId aLocId = aLocEntry->GetId();
992 aLocEntry->SetVersion(-1);
993 aLocEntry->SetSubVersion(-1);
994
995 // loop on Grid valid Id's
996 Bool_t store = kTRUE;
997 TIter gridIter(gridIds);
998 AliCDBId* aGridId = 0;
999 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
1000 // If local object is valid up to infinity we store it only if it is
1001 // the first unprocessed run!
1002 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity())
1003 {
1004 if (!fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1005 {
1006 Log(fCurrentDetector.Data(),
1007 ("TryToStoreAgain - This object has validity infinite but there are previous unprocessed runs!"));
1008 continue;
1009 } else {
1010 break;
1011 }
1012 }
1013 if(aGridId->GetPath() != aLocId.GetPath()) continue;
1014 // skip all objects valid up to infinity
1015 if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
1016 // if we get here, it means there's already some more recent object stored on Grid!
1017 store = kFALSE;
1018 break;
1019 }
1020
1021 if(!store){
1022 Log(fCurrentDetector.Data(),
1023 Form("TryToStoreAgain - A more recent object already exists in %s storage: <%s>",
1024 type, aGridId->ToString().Data()));
1025 // removing local filename...
1026 // TODO maybe it's better not to remove it, it was not copied to the Grid!
1027 TString filename;
1028 backupSto->IdToFilename(aLocId, filename);
1029 AliInfo(Form("Removing local file %s", filename.Data()));
1030 gSystem->Exec(Form("rm %s",filename.Data()));
1031 continue;
1032 }
1033
1034 // If we get here, the file can be stored!
1035 Bool_t storeOk = gridSto->Put(aLocEntry);
1036 if(storeOk){
1037 Log(fCurrentDetector.Data(),
1038 Form("TryToStoreAgain - Object <%s> successfully put into %s storage",
1039 aLocId.ToString().Data(), type));
1040
1041 // removing local filename...
1042 TString filename;
1043 backupSto->IdToFilename(aLocId, filename);
1044 AliInfo(Form("Removing local file %s", filename.Data()));
1045 gSystem->Exec(Form("rm %s", filename.Data()));
1046 continue;
1047 } else {
1048 Log(fCurrentDetector.Data(),
1049 Form("TryToStoreAgain - Grid %s storage of object <%s> failed again",
1050 type, aLocId.ToString().Data()));
1051 result = kFALSE;
1052 }
1053 }
1054 localEntries->Clear();
1055
1056 return result;
1057}
1058
1059//______________________________________________________________________________________________
1060Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* alias,
1061 TObjArray* valueSet)
1062{
1063// Retrieve all "alias" data points from the DCS server
1064// host, port: TSocket connection parameters
1065// alias: name of the alias
1066// valueSet: array of retrieved AliDCSValue's
1067
1068 AliDCSClient client(host, port, fTimeout, fRetries);
1069 if (!client.IsConnected()) {
1070 return kFALSE;
1071 }
1072
1073 Int_t result = client.GetAliasValues(alias,
1074 GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1075
1076 if (result < 0) {
1077 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
1078 alias, AliDCSClient::GetErrorString(result)));
1079
1080 if (result == AliDCSClient::fgkServerError) {
1081 Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
1082 client.GetServerError().Data()));
1083 }
1084
1085 return kFALSE;
1086 }
1087
1088 return kTRUE;
1089}
1090
1091//______________________________________________________________________________________________
1092const char* AliShuttle::GetFile(Int_t system, const char* detector,
1093 const char* id, const char* source)
1094{
1095// Get calibration file from file exchange servers
1096// calls specific getter according to system index (kDAQ, kDCS, kHLT)
1097
1098 switch(system){
1099 case kDAQ:
1100 return GetDAQFileName(detector, id, source);
1101 break;
1102 case kDCS:
1103 return GetDCSFileName(detector, id, source);
1104 break;
1105 case kHLT:
1106 return GetHLTFileName(detector, id, source);
1107 break;
1108 default:
1109 AliError(Form("No valid system index: %d",system));
1110 }
1111
1112 return 0;
1113}
1114
1115//______________________________________________________________________________________________
1116TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
1117{
1118// Get sources producing the condition file Id from file exchange servers
1119// calls specific getter according to system index (kDAQ, kDCS, kHLT)
1120
1121 switch(system){
1122 case kDAQ:
1123 return GetDAQFileSources(detector, id);
1124 break;
1125 case kDCS:
1126 return GetDCSFileSources(detector, id);
1127 break;
1128 case kHLT:
1129 return GetHLTFileSources(detector, id);
1130 break;
1131 default:
1132 AliError(Form("No valid system index: %d",system));
1133 }
1134
1135 return NULL;
1136}
1137
1138//______________________________________________________________________________________________
1139Bool_t AliShuttle::Connect(Int_t system)
1140{
1141// Connect to MySQL Server of the system's FES logbook
1142// DAQ Logbook, Shuttle Logbook and DAQ FES Logbook are on the same host
1143
1144 // check connection: if already connected return
1145 if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
1146
1147 TString lbHost, lbUser, lbPass;
1148
1149 if (system < 3) // FES logbook servers
1150 {
1151 lbHost = Form("mysql://%s", fConfig->GetFESlbHost(system));
1152 lbUser = fConfig->GetFESlbUser(system);
1153 lbPass = fConfig->GetFESlbPass(system);
1154 } else { // Run & Shuttle logbook servers
1155 // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
1156 lbHost = Form("mysql://%s", fConfig->GetDAQlbHost());
1157 lbUser = fConfig->GetDAQlbUser();
1158 lbPass = fConfig->GetDAQlbPass();
1159 }
1160
1161 fServer[system] = TSQLServer::Connect(lbHost.Data(), lbUser.Data(), lbPass.Data());
1162 if (!fServer[system] || !fServer[system]->IsConnected()) {
1163 if(system < 3)
1164 {
1165 AliError(Form("Can't establish connection to FES logbook for %s",
1166 AliShuttleInterface::GetSystemName(system)));
1167 } else {
1168 AliError("Can't establish connection to Run logbook.");
1169 }
1170 if(fServer[system]) delete fServer[system];
1171 return kFALSE;
1172 }
1173
1174 // Get tables
1175 // TODO in the configuration should the table name be there too?
1176 TSQLResult* aResult=0;
1177 switch(system){
1178 case kDAQ:
1179 aResult = fServer[kDAQ]->GetTables("REFSYSLOG");
1180 break;
1181 case kDCS:
1182 //aResult = fServer[kDCS]->GetTables("REFSYSLOG");
1183 break;
1184 case kHLT:
1185 //aResult = fServer[kHLT]->GetTables("REFSYSLOG");
1186 break;
1187 default:
1188 aResult = fServer[3]->GetTables("REFSYSLOG");
1189 break;
1190 }
1191
1192 delete aResult;
1193 return kTRUE;
1194}
1195
1196//______________________________________________________________________________________________
1197const char* AliShuttle::GetDAQFileName(const char* detector, const char* id, const char* source)
1198{
1199// Retrieves a file from the DAQ FES.
1200// First queris the DAQ logbook_fs for the DAQ file name, using the run, detector, id and source info
1201// then calls RetrieveDAQFile(DAQfilename) for actual copy to local disk
1202// run: current run being processed (given by Logbook entry fLogbookEntry)
1203// detector: the Preprocessor name
1204// id: provided as a parameter by the Preprocessor
1205// source: provided by the Preprocessor through GetFileSources function
1206
1207 // check connection, in case connect
1208 if (!Connect(kDAQ))
1209 {
1210 Log(detector, "GetDAQFileName - Couldn't connect to DAQ Logbook");
1211 return 0;
1212 }
1213
1214 // Query preparation
1215 TString sqlQueryStart = "select filePath from logbook_fs where";
1216 TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\"",
1217 GetCurrentRun(), detector, id, source);
1218 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1219
1220 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1221
1222 // Query execution
1223 TSQLResult* aResult = 0;
1224 aResult = dynamic_cast<TSQLResult*> (fServer[kDAQ]->Query(sqlQuery));
1225 if (!aResult) {
1226 Log(detector, Form("GetDAQFileName - Can't execute SQL query for: id = %s, source = %s",
1227 id, source));
1228 return 0;
1229 }
1230
1231 if(aResult->GetRowCount() == 0)
1232 {
1233 Log(detector,
1234 Form("GetDAQFileName - No entry in FES table for: id = %s, source = %s",
1235 id, source));
1236 delete aResult;
1237 return 0;
1238 }
1239
1240 if (aResult->GetRowCount() > 1) {
1241 Log(detector,
1242 Form("GetDAQFileName - More than one entry in FES table for: id = %s, source = %s",
1243 id, source));
1244 delete aResult;
1245 return 0;
1246 }
1247
1248 TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1249
1250 if (!aRow){
1251 Log(detector, Form("GetDAQFileName - Empty set result from query: id = %s, source = %s",
1252 id, source));
1253 delete aResult;
1254 return 0;
1255 }
1256
1257 TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1258
1259 delete aResult;
1260 delete aRow;
1261
1262 AliDebug(2, Form("filePath = %s",filePath.Data()));
1263
1264 // retrieved file is renamed to make it unique
1265 TString localFileName = Form("%s_%d_%s_%s.shuttle",
1266 detector, GetCurrentRun(), id, source);
1267
1268 // file retrieval from DAQ FES
1269 Bool_t result = RetrieveDAQFile(filePath.Data(), localFileName.Data());
1270 if(!result) {
1271 Log(detector, Form("GetDAQFileName - Copy of file %s from DAQ FES failed", filePath.Data()));
1272 return 0;
1273 } else {
1274 AliInfo(Form("File %s copied from DAQ FES into %s/%s",
1275 filePath.Data(), fgkShuttleTempDir, localFileName.Data()));
1276 }
1277
1278
1279 fFESCalled[kDAQ]=kTRUE;
1280 TObjString *fileParams = new TObjString(Form("%s_!?!_%s", id, source));
1281 fFESlist[kDAQ].Add(fileParams);
1282
1283 return localFileName.Data();
1284
1285}
1286
1287//______________________________________________________________________________________________
1288Bool_t AliShuttle::RetrieveDAQFile(const char* daqFileName, const char* localFileName)
1289{
1290
1291 // check temp directory: trying to cd to temp; if it does not exist, create it
1292 AliDebug(2, Form("Copy file %s from DAQ FES into folder %s and rename it as %s",
1293 daqFileName,fgkShuttleTempDir, localFileName));
1294
1295 void* dir = gSystem->OpenDirectory(fgkShuttleTempDir);
1296 if (dir == NULL) {
1297 if (gSystem->mkdir(fgkShuttleTempDir, kTRUE)) {
1298 AliError(Form("Can't open directory <%s>", fgkShuttleTempDir));
1299 return kFALSE;
1300 }
1301
1302 } else {
1303 gSystem->FreeDirectory(dir);
1304 }
1305
1306 TString baseDAQFESFolder = "DAQ";
1307 TString command = Form("scp %s@%s:%s/%s %s/%s",
1308 fConfig->GetFESUser(kDAQ),
1309 fConfig->GetFESHost(kDAQ),
1310 baseDAQFESFolder.Data(),
1311 daqFileName,
1312 fgkShuttleTempDir,
1313 localFileName);
1314
1315 AliDebug(2, Form("%s",command.Data()));
1316
1317 UInt_t nRetries = 0;
1318 UInt_t maxRetries = 3;
1319
1320 // copy!! if successful TSystem::Exec returns 0
1321 while(nRetries++ < maxRetries) {
1322 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1323 if(gSystem->Exec(command.Data()) == 0) return kTRUE;
1324 }
1325
1326 return kFALSE;
1327
1328}
1329
1330//______________________________________________________________________________________________
1331TList* AliShuttle::GetDAQFileSources(const char* detector, const char* id)
1332{
1333// Retrieves a file from the DCS FES.
1334
1335 // check connection, in case connect
1336 if(!Connect(kDAQ)){
1337 Log(detector, "GetDAQFileSources - Couldn't connect to DAQ Logbook");
1338 return 0;
1339 }
1340
1341 // Query preparation
1342 TString sqlQueryStart = "select DAQsource from logbook_fs where";
1343 TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1344 GetCurrentRun(), detector, id);
1345 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1346
1347 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1348
1349 // Query execution
1350 TSQLResult* aResult;
1351 aResult = fServer[kDAQ]->Query(sqlQuery);
1352 if (!aResult) {
1353 Log(detector, Form("GetDAQFileSources - Can't execute SQL query for id: %s", id));
1354 return 0;
1355 }
1356
1357 if (aResult->GetRowCount() == 0) {
1358 Log(detector,
1359 Form("GetDAQFileSources - No entry in FES table for id: %s", id));
1360 delete aResult;
1361 return 0;
1362 }
1363
1364 TSQLRow* aRow;
1365 TList *list = new TList();
1366 list->SetOwner(1);
1367
1368 while((aRow = aResult->Next())){
1369
1370 TString daqSource(aRow->GetField(0), aRow->GetFieldLength(0));
1371 AliDebug(2, Form("daqSource = %s", daqSource.Data()));
1372 list->Add(new TObjString(daqSource));
1373 delete aRow;
1374 }
1375 delete aResult;
1376
1377 return list;
1378
1379}
1380
1381//______________________________________________________________________________________________
1382const char* AliShuttle::GetDCSFileName(const char* /*detector*/, const char* /*id*/, const char* /*source*/){
1383// Retrieves a file from the DCS FES.
1384
1385return "You're in DCS";
1386
1387}
1388
1389//______________________________________________________________________________________________
1390TList* AliShuttle::GetDCSFileSources(const char* /*detector*/, const char* /*id*/){
1391// Retrieves a file from the DCS FES.
1392
1393return NULL;
1394
1395}
1396
1397//______________________________________________________________________________________________
1398const char* AliShuttle::GetHLTFileName(const char* /*detector*/, const char* /*id*/, const char* /*source*/){
1399// Retrieves a file from the HLT FES.
1400
1401return "You're in HLT";
1402
1403}
1404
1405//______________________________________________________________________________________________
1406TList* AliShuttle::GetHLTFileSources(const char* /*detector*/, const char* /*id*/){
1407// Retrieves a file from the HLT FES.
1408
1409return NULL;
1410
1411}
1412
1413//______________________________________________________________________________________________
1414Bool_t AliShuttle::UpdateDAQTable()
1415{
1416// Update DAQ table filling time_processed field in all rows corresponding to current run and detector
1417
1418 // check connection, in case connect
1419 if(!Connect(kDAQ)){
1420 Log(fCurrentDetector, "UpdateDAQTable - Couldn't connect to DAQ Logbook");
1421 return kFALSE;
1422 }
1423
1424 TTimeStamp now; // now
1425
1426 // Loop on FES list entries
1427 TIter iter(&fFESlist[kDAQ]);
1428 TObjString *aFESentry=0;
1429 while((aFESentry = dynamic_cast<TObjString*> (iter.Next()))){
1430 TString aFESentrystr = aFESentry->String();
1431 TObjArray *aFESarray = aFESentrystr.Tokenize("_!?!_");
1432 if(!aFESarray || aFESarray->GetEntries() != 2 ) {
1433 Log(fCurrentDetector, Form("UpdateDAQTable - error updating FES entry. Check string: <%s>",
1434 aFESentrystr.Data()));
1435 if(aFESarray) delete aFESarray;
1436 return kFALSE;
1437 }
1438 const char* fileId = ((TObjString*) aFESarray->At(0))->GetName();
1439 const char* daqSource = ((TObjString*) aFESarray->At(1))->GetName();
1440 TString whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
1441 GetCurrentRun(), fCurrentDetector.Data(), fileId, daqSource);
1442
1443 delete aFESarray;
1444
1445 TString sqlQuery = Form("update logbook_fs set time_processed=%d %s", now.GetSec(), whereClause.Data());
1446
1447 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1448
1449 // Query execution
1450 TSQLResult* aResult;
1451 aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
1452 if (!aResult) {
1453 Log(fCurrentDetector, Form("UpdateDAQTable - Can't execute SQL query <%s>", sqlQuery.Data()));
1454 return kFALSE;
1455 }
1456 delete aResult;
1457 }
1458
1459 return kTRUE;
1460}
1461
1462
1463//______________________________________________________________________________________________
1464Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
1465{
1466// Update Shuttle logbook filling detector or shuttle_done column
1467// ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
1468
1469 // check connection, in case connect
1470 if(!Connect(3)){
1471 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
1472 return kFALSE;
1473 }
1474
1475 TString detName(detector);
1476 TString setClause;
1477 if(detName == "shuttle_done") {
1478 setClause = "set shuttle_done=1";
1479 } else {
1480 TString statusStr(status);
1481 if(statusStr.Contains("done", TString::kIgnoreCase) ||
1482 statusStr.Contains("failed", TString::kIgnoreCase)){
1483 setClause = Form("set %s=\"%s\"", detector, status);
1484 } else {
1485 Log("SHUTTLE",
1486 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
1487 status, detector));
1488 return kFALSE;
1489 }
1490 }
1491
1492 TString whereClause = Form("where run=%d", GetCurrentRun());
1493
1494 TString sqlQuery = Form("update logbook_shuttle %s %s",
1495 setClause.Data(), whereClause.Data());
1496
1497 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1498
1499 // Query execution
1500 TSQLResult* aResult;
1501 aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
1502 if (!aResult) {
1503 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
1504 return kFALSE;
1505 }
1506 delete aResult;
1507
1508 return kTRUE;
1509}
1510
1511//______________________________________________________________________________________________
1512Int_t AliShuttle::GetCurrentRun() const
1513{
1514// Get current run from logbook entry
1515
1516 return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
1517}
1518
1519//______________________________________________________________________________________________
1520UInt_t AliShuttle::GetCurrentStartTime() const
1521{
1522// get current start time
1523
1524 return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
1525}
1526
1527//______________________________________________________________________________________________
1528UInt_t AliShuttle::GetCurrentEndTime() const
1529{
1530// get current end time from logbook entry
1531
1532 return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
1533}
1534
1535//______________________________________________________________________________________________
1536void AliShuttle::Log(const char* detector, const char* message)
1537{
1538// Fill log string with a message
1539
1540 void* dir = gSystem->OpenDirectory(fgkShuttleLogDir);
1541 if (dir == NULL) {
1542 if (gSystem->mkdir(fgkShuttleLogDir, kTRUE)) {
1543 AliError(Form("Can't open directory <%s>", fgkShuttleTempDir));
1544 return;
1545 }
1546
1547 } else {
1548 gSystem->FreeDirectory(dir);
1549 }
1550
1551 TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
1552 if(GetCurrentRun()>=0 ) toLog += Form("run %d - ", GetCurrentRun());
1553 toLog += Form("%s", message);
1554
1555 AliInfo(toLog.Data());
1556
1557 TString fileName;
1558 fileName.Form("%s/%s.log", fgkShuttleLogDir, detector);
1559 gSystem->ExpandPathName(fileName);
1560
1561 ofstream logFile;
1562 logFile.open(fileName, ofstream::out | ofstream::app);
1563
1564 if (!logFile.is_open()) {
1565 AliError(Form("Could not open file %s", fileName.Data()));
1566 return;
1567 }
1568
1569 logFile << toLog.Data() << "\n";
1570
1571 logFile.close();
1572}
1573
1574//______________________________________________________________________________________________
1575Bool_t AliShuttle::Collect(Int_t run)
1576{
1577//
1578// Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
1579// If a dedicated run is given this run is processed
1580//
1581// In operational mode, this is the Shuttle function triggered by the EOR signal.
1582//
1583
1584 if (run == -1)
1585 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
1586 else
1587 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
1588
1589 SetLastAction("Starting");
1590
1591 TString whereClause("where shuttle_done=0");
1592 if (run != -1)
1593 whereClause += Form(" and run=%d", run);
1594
1595 TObjArray shuttleLogbookEntries;
1596 if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
1597 {
1598 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1599 return kFALSE;
1600 }
1601
1602 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1603 fFirstUnprocessed[iDet] = kTRUE;
1604
1605 if (run != 1)
1606 {
1607 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
1608 // flag them into fFirstUnprocessed array
1609 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
1610 TObjArray tmpLogbookEntries;
1611 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
1612 {
1613 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1614 return kFALSE;
1615 }
1616
1617 TIter iter(&tmpLogbookEntries);
1618 AliShuttleLogbookEntry* anEntry = 0;
1619 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
1620 {
1621 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1622 {
1623 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1624 {
1625 AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1626 anEntry->GetRun(), GetDetName(iDet)));
1627 fFirstUnprocessed[iDet] = kFALSE;
1628 }
1629 }
1630
1631 }
1632
1633 }
1634
1635 if (!RetrieveConditionsData(shuttleLogbookEntries))
1636 {
1637 Log("SHUTTLE", "Collect - Process of at least one run failed");
1638 return kFALSE;
1639 }
1640
1641 return kTRUE;
1642}
1643
1644//______________________________________________________________________________________________
1645Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
1646{
1647// Retrieve conditions data for all runs that aren't processed yet
1648
1649 Bool_t hasError = kFALSE;
1650
1651 TIter iter(&dateEntries);
1652 AliShuttleLogbookEntry* anEntry;
1653
1654 while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
1655 if (!Process(anEntry)){
1656 hasError = kTRUE;
1657 }
1658 }
1659
1660 return hasError == kFALSE;
1661}
1662
1663//______________________________________________________________________________________________
1664ULong_t AliShuttle::GetTimeOfLastAction() const
1665{
1666 ULong_t tmp;
1667
1668 fMonitoringMutex->Lock();
1669
1670 tmp = fLastActionTime;
1671
1672 fMonitoringMutex->UnLock();
1673
1674 return tmp;
1675}
1676
1677//______________________________________________________________________________________________
1678const TString AliShuttle::GetLastAction() const
1679{
1680 // returns a string description of the last action
1681
1682 TString tmp;
1683
1684 fMonitoringMutex->Lock();
1685
1686 tmp = fLastAction;
1687
1688 fMonitoringMutex->UnLock();
1689
1690 return tmp;
1691}
1692
1693//______________________________________________________________________________________________
1694void AliShuttle::SetLastAction(const char* action)
1695{
1696 // updates the monitoring variables
1697
1698 fMonitoringMutex->Lock();
1699
1700 fLastAction = action;
1701 fLastActionTime = time(0);
1702
1703 fMonitoringMutex->UnLock();
1704}
1705
1706//______________________________________________________________________________________________
1707const char* AliShuttle::GetRunParameter(const char* param)
1708{
1709// returns run parameter read from DAQ logbook
1710
1711 if(!fLogbookEntry) {
1712 AliError("No logbook entry!");
1713 return 0;
1714 }
1715
1716 return fLogbookEntry->GetRunParameter(param);
1717}