o) Adding time out to the execution of the preprocessors: The Shuttle forks and the...
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
CommitLineData
73abe331 1/**************************************************************************
2 * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3 * *
4 * Author: The ALICE Off-line Project. *
5 * Contributors are mentioned in the code where appropriate. *
6 * *
7 * Permission to use, copy, modify and distribute this software and its *
8 * documentation strictly for non-commercial purposes is hereby granted *
9 * without fee, provided that the above copyright notice appears in all *
10 * copies and that both the copyright notice and this permission notice *
11 * appear in the supporting documentation. The authors make no claims *
12 * about the suitability of this software for any purpose. It is *
13 * provided "as is" without express or implied warranty. *
14 **************************************************************************/
15
16/*
17$Log$
cb343cfd 18Revision 1.17 2006/10/05 16:20:55 jgrosseo
19adapting to new CDB classes
20
6ec0e06c 21Revision 1.16 2006/10/05 15:46:26 jgrosseo
22applying to the new interface
23
481441a2 24Revision 1.15 2006/10/02 16:38:39 jgrosseo
25update (alberto):
26fixed memory leaks
27storing of objects that failed to be stored to the grid before
28interfacing of shuttle status table in daq system
29
2bb7b766 30Revision 1.14 2006/08/29 09:16:05 jgrosseo
31small update
32
85a80aa9 33Revision 1.13 2006/08/15 10:50:00 jgrosseo
34effc++ corrections (alberto)
35
4f0ab988 36Revision 1.12 2006/08/08 14:19:29 jgrosseo
37Update to shuttle classes (Alberto)
38
39- Possibility to set the full object's path in the Preprocessor's and
40Shuttle's Store functions
41- Possibility to extend the object's run validity in the same classes
42("startValidity" and "validityInfinite" parameters)
43- Implementation of the StoreReferenceData function to store reference
44data in a dedicated CDB storage.
45
84090f85 46Revision 1.11 2006/07/21 07:37:20 jgrosseo
47last run is stored after each run
48
7bfb2090 49Revision 1.10 2006/07/20 09:54:40 jgrosseo
50introducing status management: The processing per subdetector is divided into several steps,
51after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
52can keep track of the number of failures and skips further processing after a certain threshold is
53exceeded. These thresholds can be configured in LDAP.
54
5164a766 55Revision 1.9 2006/07/19 10:09:55 jgrosseo
56new configuration, accesst to DAQ FES (Alberto)
57
57f50b3c 58Revision 1.8 2006/07/11 12:44:36 jgrosseo
59adding parameters for extended validity range of data produced by preprocessor
60
17111222 61Revision 1.7 2006/07/10 14:37:09 jgrosseo
62small fix + todo comment
63
e090413b 64Revision 1.6 2006/07/10 13:01:41 jgrosseo
65enhanced storing of last sucessfully processed run (alberto)
66
a7160fe9 67Revision 1.5 2006/07/04 14:59:57 jgrosseo
68revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
69
45a493ce 70Revision 1.4 2006/06/12 09:11:16 jgrosseo
71coding conventions (Alberto)
72
58bc3020 73Revision 1.3 2006/06/06 14:26:40 jgrosseo
74o) removed files that were moved to STEER
75o) shuttle updated to follow the new interface (Alberto)
76
b948db8d 77Revision 1.2 2006/03/07 07:52:34 hristov
78New version (B.Yordanov)
79
d477ad88 80Revision 1.6 2005/11/19 17:19:14 byordano
81RetrieveDATEEntries and RetrieveConditionsData added
82
83Revision 1.5 2005/11/19 11:09:27 byordano
84AliShuttle declaration added
85
86Revision 1.4 2005/11/17 17:47:34 byordano
87TList changed to TObjArray
88
89Revision 1.3 2005/11/17 14:43:23 byordano
90import to local CVS
91
92Revision 1.1.1.1 2005/10/28 07:33:58 hristov
93Initial import as subdirectory in AliRoot
94
73abe331 95Revision 1.2 2005/09/13 08:41:15 byordano
96default startTime endTime added
97
98Revision 1.4 2005/08/30 09:13:02 byordano
99some docs added
100
101Revision 1.3 2005/08/29 21:15:47 byordano
102some docs added
103
104*/
105
106//
107// This class is the main manager for AliShuttle.
108// It organizes the data retrieval from DCS and call the
b948db8d 109// interface methods of AliPreprocessor.
73abe331 110// For every detector in AliShuttleConfgi (see AliShuttleConfig),
111// data for its set of aliases is retrieved. If there is registered
b948db8d 112// AliPreprocessor for this detector then it will be used
113// accroding to the schema (see AliPreprocessor).
114// If there isn't registered AliPreprocessor than the retrieved
73abe331 115// data is stored automatically to the undelying AliCDBStorage.
116// For detSpec is used the alias name.
117//
118
119#include "AliShuttle.h"
120
121#include "AliCDBManager.h"
122#include "AliCDBStorage.h"
123#include "AliCDBId.h"
84090f85 124#include "AliCDBRunRange.h"
125#include "AliCDBPath.h"
5164a766 126#include "AliCDBEntry.h"
73abe331 127#include "AliShuttleConfig.h"
128#include "AliDCSClient.h"
129#include "AliLog.h"
b948db8d 130#include "AliPreprocessor.h"
5164a766 131#include "AliShuttleStatus.h"
2bb7b766 132#include "AliShuttleLogbookEntry.h"
73abe331 133
57f50b3c 134#include <TSystem.h>
58bc3020 135#include <TObject.h>
b948db8d 136#include <TString.h>
57f50b3c 137#include <TTimeStamp.h>
73abe331 138#include <TObjString.h>
57f50b3c 139#include <TSQLServer.h>
140#include <TSQLResult.h>
141#include <TSQLRow.h>
cb343cfd 142#include <TMutex.h>
73abe331 143
5164a766 144#include <fstream>
145
cb343cfd 146#include <sys/types.h>
147#include <sys/wait.h>
148
73abe331 149ClassImp(AliShuttle)
150
2bb7b766 151TString AliShuttle::fgkMainCDB("alien://folder=ShuttleCDB");
84090f85 152TString AliShuttle::fgkLocalCDB("local://LocalShuttleCDB");
2bb7b766 153TString AliShuttle::fgkMainRefStorage("alien://folder=ShuttleReference");
84090f85 154TString AliShuttle::fgkLocalRefStorage("local://LocalReferenceStorage");
155
4f0ab988 156Bool_t AliShuttle::fgkProcessDCS(kTRUE);
157
158
84090f85 159const char* AliShuttle::fgkShuttleTempDir = gSystem->ExpandPathName("$ALICE_ROOT/SHUTTLE/temp");
160const char* AliShuttle::fgkShuttleLogDir = gSystem->ExpandPathName("$ALICE_ROOT/SHUTTLE/log");
57f50b3c 161
2bb7b766 162const char* AliShuttle::fgkDetectorName[AliShuttle::kNDetectors] = {"SPD", "SDD", "SSD", "TPC", "TRD", "TOF",
57f50b3c 163 "PHOS", "CPV", "RICH", "EMCAL", "MUON_TRK", "MUON_TRG", "FMD", "ZDC", "PMD", "START", "VZERO"};
164
2bb7b766 165const char* AliShuttle::fgkDetectorCode[AliShuttle::kNDetectors] = {"SPD", "SDD", "SSD", "TPC", "TRD", "TOF",
57f50b3c 166 "PHS", "CPV", "HMP", "EMC", "MCH", "MTR", "FMD", "ZDC", "PMD", "T00", "V00"};
b948db8d 167
168//______________________________________________________________________________________________
169AliShuttle::AliShuttle(const AliShuttleConfig* config,
170 UInt_t timeout, Int_t retries):
4f0ab988 171fConfig(config),
172fTimeout(timeout), fRetries(retries),
173fPreprocessorMap(),
2bb7b766 174fLogbookEntry(0),
4f0ab988 175fCurrentDetector(""),
85a80aa9 176fStatusEntry(0),
cb343cfd 177fGridError(kFALSE),
178fMonitoringMutex(0),
179fLastActionTime(0)
73abe331 180{
181 //
182 // config: AliShuttleConfig used
73abe331 183 // timeout: timeout used for AliDCSClient connection
184 // retries: the number of retries in case of connection error.
185 //
186
57f50b3c 187 if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
188 for(int iSys=0;iSys<3;iSys++) {
189 fServer[iSys]=0;
190 fFESlist[iSys].SetOwner(kTRUE);
191 }
2bb7b766 192 fPreprocessorMap.SetOwner(kTRUE);
cb343cfd 193
194 fMonitoringMutex = new TMutex();
58bc3020 195}
196
b948db8d 197//______________________________________________________________________________________________
57f50b3c 198AliShuttle::~AliShuttle()
58bc3020 199{
200// destructor
201
b948db8d 202 fPreprocessorMap.DeleteAll();
57f50b3c 203 for(int iSys=0;iSys<3;iSys++)
204 if(fServer[iSys]) {
205 fServer[iSys]->Close();
206 delete fServer[iSys];
cb343cfd 207 fServer[iSys] = 0;
57f50b3c 208 }
2bb7b766 209
210 if (fStatusEntry){
211 delete fStatusEntry;
212 fStatusEntry = 0;
213 }
cb343cfd 214
215 if (fMonitoringMutex)
216 {
217 delete fMonitoringMutex;
218 fMonitoringMutex = 0;
219 }
73abe331 220}
221
b948db8d 222//______________________________________________________________________________________________
57f50b3c 223void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
58bc3020 224{
73abe331 225 //
b948db8d 226 // Registers new AliPreprocessor.
73abe331 227 // It uses GetName() for indentificator of the pre processor.
228 // The pre processor is registered it there isn't any other
229 // with the same identificator (GetName()).
230 //
231
b948db8d 232 if (fPreprocessorMap.GetValue(preprocessor->GetName())) {
233 AliWarning(Form("AliPreprocessor %s is already registered!",
234 preprocessor->GetName()));
73abe331 235 return;
236 }
237
b948db8d 238 fPreprocessorMap.Add(new TObjString(preprocessor->GetName()), preprocessor);
73abe331 239}
b948db8d 240//______________________________________________________________________________________________
84090f85 241UInt_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
242 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
73abe331 243{
84090f85 244 // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
245 // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
246 // using this function. Use StoreReferenceData instead!
85a80aa9 247 // It calls WriteToCDB function which perform actual storage
b948db8d 248
85a80aa9 249 return WriteToCDB(fgkMainCDB, fgkLocalCDB, path, object,
250 metaData, validityStart, validityInfinite);
84090f85 251
252}
253
254//______________________________________________________________________________________________
481441a2 255UInt_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
84090f85 256{
257 // Stores a CDB object in the storage for reference data. This objects will not be available during
258 // offline reconstrunction. Use this function for reference data only!
85a80aa9 259 // It calls WriteToCDB function which perform actual storage
260
481441a2 261 return WriteToCDB(fgkMainRefStorage, fgkLocalRefStorage, path, object, metaData);
84090f85 262
85a80aa9 263}
264
265//______________________________________________________________________________________________
266UInt_t AliShuttle::WriteToCDB(const char* mainUri, const char* localUri,
267 const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
268 Int_t validityStart, Bool_t validityInfinite)
269{
270 // write object into the CDB. Parameters are passed by Store and StoreReferenceData functions.
271 // The parameters are:
272 // 1) Uri of the main storage (Grid)
273 // 2) Uri of the backup storage (Local)
274 // 3) the object's path.
275 // 4) the object to be stored
276 // 5) the metaData to be associated with the object
277 // 6) the validity start run number w.r.t. the current run,
84090f85 278 // if the data is valid only for this run leave the default 0
85a80aa9 279 // 7) specifies if the calibration data is valid for infinity (this means until updated),
84090f85 280 // typical for calibration runs, the default is kFALSE
281 //
84090f85 282 // returns 0 if fail
85a80aa9 283 // 1 if stored in main (Grid) storage
284 // 2 if stored in backup (Local) storage
84090f85 285
85a80aa9 286 const char* cdbType = (mainUri == fgkMainCDB) ? "CDB" : "Reference";
2bb7b766 287
85a80aa9 288 Int_t firstRun = GetCurrentRun() - validityStart;
84090f85 289 if(firstRun < 0) {
2bb7b766 290 AliError("First valid run happens to be less than 0! Setting it to 0.");
84090f85 291 firstRun=0;
292 }
293
294 Int_t lastRun = -1;
295 if(validityInfinite) {
296 lastRun = AliCDBRunRange::Infinity();
297 } else {
298 lastRun = GetCurrentRun();
299 }
300
2bb7b766 301 AliCDBId id(path, firstRun, lastRun, -1, -1);
302
303 if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
304 TObjString runUsed = Form("%d", GetCurrentRun());
305 metaData->SetProperty("RunUsed(TObjString)",&runUsed);
306 }
84090f85 307
308 UInt_t result = 0;
309
85a80aa9 310 if (!(AliCDBManager::Instance()->GetStorage(mainUri))) {
2bb7b766 311 AliError(Form("WriteToCDB - Cannot activate main %s storage", cdbType));
84090f85 312 } else {
85a80aa9 313 result = (UInt_t) AliCDBManager::Instance()->GetStorage(mainUri)
84090f85 314 ->Put(object, id, metaData);
315 }
316
317 if(!result) {
318
319 Log(fCurrentDetector,
2bb7b766 320 Form("WriteToCDB - Problem with main %s storage. Putting <%s> into backup storage",
321 cdbType, path.GetPath().Data()));
322
323 // Set Grid version to current run number, to ease retrieval later
324 id.SetVersion(GetCurrentRun());
84090f85 325
85a80aa9 326 result = AliCDBManager::Instance()->GetStorage(localUri)
84090f85 327 ->Put(object, id, metaData);
328
329 if(result) {
330 result = 2;
85a80aa9 331 fGridError = kTRUE;
84090f85 332 }else{
2bb7b766 333 Log(fCurrentDetector, "WriteToCDB - Can't store data!");
b948db8d 334 }
335 }
2bb7b766 336
b948db8d 337 return result;
338
73abe331 339}
340
b948db8d 341//______________________________________________________________________________________________
5164a766 342AliShuttleStatus* AliShuttle::ReadShuttleStatus()
343{
2bb7b766 344// Reads the AliShuttleStatus from the CDB
5164a766 345
2bb7b766 346 if (fStatusEntry){
347 delete fStatusEntry;
348 fStatusEntry = 0;
349 }
5164a766 350
2bb7b766 351 fStatusEntry = AliCDBManager::Instance()->GetStorage(AliShuttle::GetLocalCDB())
352 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
5164a766 353
2bb7b766 354 if (!fStatusEntry) return 0;
355 fStatusEntry->SetOwner(1);
5164a766 356
2bb7b766 357 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
358 if (!status) {
359 AliError("Invalid object stored to CDB!");
360 return 0;
361 }
5164a766 362
2bb7b766 363 return status;
5164a766 364}
365
366//______________________________________________________________________________________________
7bfb2090 367Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
5164a766 368{
2bb7b766 369// writes the status for one subdetector
370
371 if (fStatusEntry){
372 delete fStatusEntry;
373 fStatusEntry = 0;
374 }
5164a766 375
2bb7b766 376 Int_t run = GetCurrentRun();
5164a766 377
2bb7b766 378 AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
5164a766 379
2bb7b766 380 fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
381 fStatusEntry->SetOwner(1);
5164a766 382
2bb7b766 383 UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
7bfb2090 384
2bb7b766 385 if (!result) {
386 AliError(Form("WriteShuttleStatus for %s, run %d failed", fCurrentDetector.Data(), run));
387 return kFALSE;
388 }
7bfb2090 389
2bb7b766 390 return kTRUE;
5164a766 391}
392
393//______________________________________________________________________________________________
394void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
395{
396 // changes the AliShuttleStatus for the given detector and run to the given status
397
2bb7b766 398 if (!fStatusEntry){
399 AliError("UNEXPECTED: fStatusEntry empty");
400 return;
401 }
5164a766 402
2bb7b766 403 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
5164a766 404
2bb7b766 405 if (!status){
406 AliError("UNEXPECTED: status could not be read from current CDB entry");
407 return;
408 }
5164a766 409
cb343cfd 410 TString actionStr;
411 actionStr.Form("UpdateShuttleStatus - %s: Changing state from %s to %s", fCurrentDetector.Data(),
412 status->GetStatusName(), status->GetStatusName(newStatus));
413 Log("SHUTTLE", actionStr);
414 SetLastAction(actionStr);
5164a766 415
2bb7b766 416 status->SetStatus(newStatus);
417 if (increaseCount) status->IncreaseCount();
5164a766 418
2bb7b766 419 AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
5164a766 420}
5164a766 421//______________________________________________________________________________________________
422Bool_t AliShuttle::ContinueProcessing()
423{
2bb7b766 424// this function reads the AliShuttleStatus information from CDB and
425// checks if the processing should be continued
426// if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
427
428 if(!GetDetCode(fCurrentDetector)) {
429 Log("SHUTTLE", Form("ContinueProcessing - %s: unknown detector",
430 fCurrentDetector.Data()));
431 return kFALSE;
432 }
433
434 AliShuttleLogbookEntry::Status entryStatus =
435 fLogbookEntry->GetDetectorStatus(GetDetCode(fCurrentDetector));
436
437 if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
438 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s",
439 fCurrentDetector.Data(),
440 fLogbookEntry->GetDetectorStatusName(entryStatus)));
441 return kFALSE;
442 }
443
444 // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
445 AliShuttleStatus* status = ReadShuttleStatus();
446 if (!status) {
447 // first time
448 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
449 fCurrentDetector.Data()));
450 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
451 return WriteShuttleStatus(status);
452 }
453
454 // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
455 // If it happens it may mean Logbook updating failed... let's do it now!
456 if (status->GetStatus() == AliShuttleStatus::kDone ||
457 status->GetStatus() == AliShuttleStatus::kFailed){
458 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
459 fCurrentDetector.Data(),
460 status->GetStatusName(status->GetStatus())));
461 UpdateShuttleLogbook(fCurrentDetector.Data(),
462 status->GetStatusName(status->GetStatus()));
463 return kFALSE;
464 }
465
466 if (status->GetStatus() == AliShuttleStatus::kStoreFailed) {
467 Log("SHUTTLE",
468 Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
469 fCurrentDetector.Data()));
470 if(TryToStoreAgain()){
471 Log(fCurrentDetector.Data(), "ContinueProcessing - All objects successfully stored into OCDB");
472 UpdateShuttleStatus(AliShuttleStatus::kDone);
473 UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
474 } else {
475 Log("SHUTTLE",
476 Form("ContinueProcessing - %s: Grid storage failed again",
477 fCurrentDetector.Data()));
478 }
479 return kFALSE;
480 }
481
482 // if we get here, there is a restart
483
484 // abort conditions
485 // TODO we should add two counters, one for PP and one for DCS!
cb343cfd 486 if (status->GetCount() >= fConfig->GetMaxRetries()) {
2bb7b766 487 Log("SHUTTLE",
488 Form("ContinueProcessing - %s failed %d times in status %s - Updating Shuttle Logbook",
489 fCurrentDetector.Data(),
490 status->GetCount(), status->GetStatusName()));
491 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
492 return kFALSE;
493 }
494
495 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. Got stuck before in %s. Retry number %d.",
496 fCurrentDetector.Data(),
497 status->GetStatusName(), status->GetCount()));
498
cb343cfd 499 UpdateShuttleStatus(AliShuttleStatus::kStarted, kTRUE);
2bb7b766 500
501 return kTRUE;
5164a766 502}
503
504//______________________________________________________________________________________________
2bb7b766 505Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
58bc3020 506{
73abe331 507 //
b948db8d 508 // Makes data retrieval for all detectors in the configuration.
2bb7b766 509 // entry: Shuttle logbook entry, contains run paramenters and status of detectors
510 // (Unprocessed, Inactive, Failed or Done).
d477ad88 511 // Returns kFALSE in case of error occured and kTRUE otherwise
73abe331 512 //
513
2bb7b766 514 if(!entry) return kFALSE;
515
516 fLogbookEntry = entry;
517
518 if(fLogbookEntry->IsDone()){
519 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
520 UpdateShuttleLogbook("shuttle_done");
521 fLogbookEntry = 0;
522 return kTRUE;
523 }
524
525
526 AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
527 GetCurrentRun()));
528
529 fLogbookEntry->Print("");
57f50b3c 530
531 // Initialization
d477ad88 532 Bool_t hasError = kFALSE;
57f50b3c 533 for(Int_t iSys=0;iSys<3;iSys++) fFESCalled[iSys]=kFALSE;
5164a766 534
2bb7b766 535 AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
536 if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
537 AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
538 if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
d477ad88 539
57f50b3c 540 // Loop on detectors in the configuration
b948db8d 541 TIter iter(fConfig->GetDetectors());
2bb7b766 542 TObjString* aDetector = 0;
b948db8d 543
73abe331 544 while ((aDetector = (TObjString*) iter.Next())) {
7bfb2090 545 fCurrentDetector = aDetector->String();
5164a766 546
5164a766 547 if (!fConfig->HostProcessDetector(fCurrentDetector)) continue;
548
2bb7b766 549 AliPreprocessor* aPreprocessor =
550 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
551 if(!aPreprocessor){
cb343cfd 552 Log("SHUTTLE",Form("Process: no preprocessor registered. Skipping %s", fCurrentDetector.Data()));
2bb7b766 553 continue;
554 }
555
7bfb2090 556 if (ContinueProcessing() == kFALSE) continue;
5164a766 557
2bb7b766 558 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START ******",
559 GetCurrentRun(), aDetector->GetName()));
560
85a80aa9 561
cb343cfd 562 Int_t pid = fork();
2bb7b766 563
cb343cfd 564 if (pid < 0)
565 {
566 Log("SHUTTLE", "ERROR: Forking failed");
567 }
568 else if (pid > 0)
569 {
570 // parent
571 AliInfo(Form("In parent process of %d - %s: Starting monitoring", GetCurrentRun(), aDetector->GetName()));
57f50b3c 572
cb343cfd 573 Long_t begin = time(0);
574
575 int status; // to be used with waitpid, on purpose an int (not Int_t)!
576 while (waitpid(pid, &status, WNOHANG) == 0)
577 {
578 Long_t expiredTime = time(0) - begin;
579
580 if (expiredTime > fConfig->GetPPTimeOut())
581 {
582 Log("SHUTTLE", Form("Process time out. Run time: %d seconds. Killing...", expiredTime));
d477ad88 583
cb343cfd 584 kill(pid, 9);
585
586 hasError = kTRUE;
587
588 gSystem->Sleep(1000);
589 }
590 else
591 {
592 if (expiredTime % 60 == 0)
593 Log("SHUTTLE", Form("Checked process. Run time: %d seconds.", expiredTime));
594
595 gSystem->Sleep(1000);
596 }
597 }
598
599 AliInfo(Form("In parent process of %d - %s: Client has terminated.", GetCurrentRun(), aDetector->GetName()));
600
601 if (WIFEXITED(status))
602 {
603 Int_t returnCode = WEXITSTATUS(status);
604
605 Log("SHUTTLE", Form("The return code is %d", returnCode));
606
607 if (returnCode != 0)
608 hasError = kTRUE;
609 }
610 }
611 else if (pid == 0)
612 {
613 // client
614 AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
615
616 UInt_t result = ProcessCurrentDetector();
617
618 Int_t returnCode = 0; // will be set to 1 in case of an error
619
620 if (!result) {
621 returnCode = 1;
622 AliInfo(Form("\n \t\t\t****** run %d - %s: PREPROCESSOR ERROR ****** \n\n",
623 GetCurrentRun(), aDetector->GetName()));
624 }
625 else if(result == 2) {
626 AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
627 GetCurrentRun(), aDetector->GetName()));
628 } else {
629 AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
630 GetCurrentRun(), aDetector->GetName()));
631 }
632
633 if (result > 0)
634 {
635 // Process successful: Update time_processed field in FES logbooks!
636 if(fFESCalled[kDAQ]) {
637 if (UpdateDAQTable() == kFALSE)
638 returnCode = 1;
639 fFESlist[kDAQ].Clear();
640 }
641 //if(fFESCalled[kDCS]) {
642 // if (UpdateDCSTable(aDetector->GetName()) == kFALSE)
643 // returnCode = 1;
644 // fFESlist[kDCS].Clear();
645 //}
646 //if(fFESCalled[kHLT]) {
647 // if (UpdateHLTTable(aDetector->GetName()) == kFALSE)
648 // returnCode = 1;
649 // fFESlist[kHLT].Clear();
650 //}
651 }
652
653 AliInfo(Form("Client process of %d - %s is exiting now with %d.", GetCurrentRun(), aDetector->GetName(), returnCode));
654
655 // the client exits here
656 gSystem->Exit(returnCode);
657
658 AliError("We should never get here!!!");
659 }
7bfb2090 660 }
5164a766 661
2bb7b766 662 AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
663 GetCurrentRun()));
664
665 //check if shuttle is done for this run, if so update logbook
666 TObjArray checkEntryArray;
667 checkEntryArray.SetOwner(1);
668 TString whereClause = Form("where run=%d",GetCurrentRun());
669 if(QueryShuttleLogbook(whereClause.Data(), checkEntryArray)) {
b948db8d 670
2bb7b766 671 AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
672 (checkEntryArray.At(0));
673
674 if(checkEntry && checkEntry->IsDone()){
675 Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
676 UpdateShuttleLogbook("shuttle_done");
677 }
678 }
679
680 fLogbookEntry = 0;
85a80aa9 681
a7160fe9 682 return hasError == kFALSE;
73abe331 683}
684
b948db8d 685//______________________________________________________________________________________________
2bb7b766 686UInt_t AliShuttle::ProcessCurrentDetector()
73abe331 687{
688 //
2bb7b766 689 // Makes data retrieval just for a specific detector (fCurrentDetector).
73abe331 690 // Threre should be a configuration for this detector.
73abe331 691
2bb7b766 692 AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
73abe331 693
7bfb2090 694 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
73abe331 695
7bfb2090 696 TString host(fConfig->GetDCSHost(fCurrentDetector));
5164a766 697 Int_t port = fConfig->GetDCSPort(fCurrentDetector);
698
699 TIter iter(fConfig->GetDCSAliases(fCurrentDetector));
73abe331 700 TObjString* anAlias;
b948db8d 701 TMap aliasMap;
2bb7b766 702 aliasMap.SetOwner(1);
73abe331 703
85a80aa9 704 Bool_t aDCSError = kFALSE;
705 fGridError = kFALSE;
d477ad88 706
b948db8d 707 while ((anAlias = (TObjString*) iter.Next())) {
2bb7b766 708 TObjArray *valueSet = new TObjArray();
709 valueSet->SetOwner(1);
4f0ab988 710 // TODO Test only... I've added a flag that allows to
711 // exclude DCS archive DB query
712 if(fgkProcessDCS){
713 AliInfo("Querying DCS archive DB data...");
85a80aa9 714 aDCSError = (GetValueSet(host, port, anAlias->String(), valueSet) == 0);
4f0ab988 715 } else {
716 AliInfo(Form("Skipping DCS processing. Port = %d",port));
85a80aa9 717 aDCSError = kFALSE;
4f0ab988 718 }
85a80aa9 719 if(!aDCSError) {
2bb7b766 720 aliasMap.Add(anAlias->Clone(), valueSet);
b948db8d 721 }else{
2bb7b766 722 Log(fCurrentDetector, Form("ProcessCurrentDetector - Error while retrieving alias %s",
723 anAlias->GetName()));
724 UpdateShuttleStatus(AliShuttleStatus::kDCSError, kTRUE);
725 aliasMap.DeleteAll();
726 return 0;
73abe331 727 }
728 }
b948db8d 729
2bb7b766 730 // DCS Archive DB processing successful. Call Preprocessor!
85a80aa9 731 UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
a7160fe9 732
85a80aa9 733 AliPreprocessor* aPreprocessor =
5164a766 734 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
b948db8d 735
2bb7b766 736 aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
2bb7b766 737 UInt_t aPPResult = aPreprocessor->Process(&aliasMap);
738
739 UInt_t returnValue = 0;
85a80aa9 740 if (aPPResult == 0) { // Preprocessor error
cb343cfd 741 UpdateShuttleStatus(AliShuttleStatus::kPPError);
2bb7b766 742 returnValue = 0;
85a80aa9 743 } else if (fGridError == kFALSE) { // process and Grid storage ok!
744 UpdateShuttleStatus(AliShuttleStatus::kDone);
2bb7b766 745 UpdateShuttleLogbook(fCurrentDetector, "DONE");
746 Log(fCurrentDetector.Data(),
747 "ProcessCurrentDetector - Preprocessor and Grid storage ended successfully");
748 returnValue = 1;
85a80aa9 749 } else { // Grid storage error (process ok, but object put in local storage)
750 UpdateShuttleStatus(AliShuttleStatus::kStoreFailed);
2bb7b766 751 returnValue = 2;
85a80aa9 752 }
b948db8d 753
2bb7b766 754 aliasMap.DeleteAll();
b948db8d 755
2bb7b766 756 return returnValue;
757}
758
759//______________________________________________________________________________________________
760Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
761 TObjArray& entries)
762{
763// Query DAQ's Shuttle logbook and fills detector status object.
764// Call QueryRunParameters to query DAQ logbook for run parameters.
765
766 // check connection, in case connect
767 if(!Connect(kDAQ)) return kFALSE;
768
769 TString sqlQuery;
770 sqlQuery = Form("select * from logbook_shuttle %s order by run", whereClause);
771
772 TSQLResult* aResult = fServer[kDAQ]->Query(sqlQuery);
773 if (!aResult) {
774 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
775 return kFALSE;
776 }
777
778 if(aResult->GetRowCount() == 0) {
779 if(sqlQuery.Contains("where shuttle_done=0")){
780 Log("SHUTTLE", "QueryShuttleLogbook - All runs in Shuttle Logbook are already DONE");
781 delete aResult;
782 return kTRUE;
783 } else {
784 AliError("No entries in Shuttle Logbook match request");
785 delete aResult;
786 return kFALSE;
787 }
788 }
789
790 // TODO Check field count!
791 const UInt_t nCols = 24;
792 if (aResult->GetFieldCount() != (Int_t) nCols) {
793 AliError("Invalid SQL result field number!");
794 delete aResult;
795 return kFALSE;
796 }
797
798 entries.SetOwner(1);
799
800 TSQLRow* aRow;
801 while ((aRow = aResult->Next())) {
802 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
803 Int_t run = runString.Atoi();
804
805 UInt_t startTime, endTime;
806 if(!QueryRunParameters(run, startTime, endTime)) continue;
807
808 const UInt_t nDet = AliShuttle::kNDetectors;
809 AliShuttleLogbookEntry::Status detStatus[nDet];
810
811 // loop on detectors
812 for(UInt_t ii = 0; ii < nCols; ii++){
813 TString detCode(aResult->GetFieldName(ii));
814 Int_t detPos = AliShuttle::GetDetPos(detCode.Data());
815 if(detPos < 0) continue;
816 TString statusString(aRow->GetField(ii), aRow->GetFieldLength(ii));
817 if(statusString == "UNPROCESSED"){
818 detStatus[detPos] = AliShuttleLogbookEntry::kUnprocessed;
819 } else if (statusString == "INACTIVE") {
820 detStatus[detPos] = AliShuttleLogbookEntry::kInactive;
821 } else if (statusString == "FAILED") {
822 detStatus[detPos] = AliShuttleLogbookEntry::kFailed;
823 } else if (statusString == "DONE") {
824 detStatus[detPos] = AliShuttleLogbookEntry::kDone;
825 }
826 }
827
828 entries.AddLast(new AliShuttleLogbookEntry(run, startTime, endTime, detStatus));
829 delete aRow;
830 }
831
832 if(sqlQuery.Contains("where shuttle_done=0"))
833 Log("SHUTTLE", Form("QueryShuttleLogbook - Found %d unprocessed runs in Shuttle Logbook",
834 entries.GetEntriesFast()));
835 delete aResult;
836 return kTRUE;
837}
838
839//______________________________________________________________________________________________
840Bool_t AliShuttle::QueryRunParameters(Int_t& run, UInt_t& startTime, UInt_t& endTime)
841{
842// Retrieve start time and end time for run in the DAQ logbook
843
844 // check connection, in case connect
845 if(!Connect(kDAQ)) return kFALSE;
846
847 TString sqlQuery;
848 sqlQuery = Form("select time_start, time_end from logbook where run=%d", run);
849
850 TSQLResult* aResult = fServer[kDAQ]->Query(sqlQuery);
851 if (!aResult) {
852 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
853 return kFALSE;
854 }
855
856 if(aResult->GetRowCount() == 0) {
857 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
858 delete aResult;
859 return kFALSE;
860 }
861
862 if(aResult->GetRowCount() > 1) {
863 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
864 delete aResult;
865 return kFALSE;
866 }
867
868 TSQLRow* aRow;
869 while ((aRow = aResult->Next())) {
870
871 TString startTimeString(aRow->GetField(0),
872 aRow->GetFieldLength(0));
873 startTime = startTimeString.Atoi();
874 TString endTimeString(aRow->GetField(1),
875 aRow->GetFieldLength(1));
876 endTime = endTimeString.Atoi();
877
878 if (!startTime || !endTime || startTime > endTime) {
879 Log("SHUTTLE",
880 Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
881 run, startTime, endTime));
882 delete aRow;
883 delete aResult;
884 return kFALSE;
885 }
886
887 delete aRow;
888 }
889
890 delete aResult;
891 return kTRUE;
892}
893
894//______________________________________________________________________________________________
895Bool_t AliShuttle::TryToStoreAgain()
896{
897 // Called in case the detector failed to store the object in Grid OCDB
898 // It tries to store the object again, if it does not find more recent and overlapping objects
899 // Calls underlying TryToStoreAgain(const char*) function twice, for OCDB and Reference storage.
900
901 AliInfo("Trying to store OCDB data again...");
902 Bool_t resultCDB = TryToStoreAgain(fgkMainCDB);
903
904 AliInfo("Trying to store reference data again...");
905 Bool_t resultRef = TryToStoreAgain(fgkMainRefStorage);
906
907 return resultCDB && resultRef;
908}
909
910//______________________________________________________________________________________________
911Bool_t AliShuttle::TryToStoreAgain(TString& gridURI)
912{
913 // Called by TryToStoreAgain(), performs actual storage retry
914
6ec0e06c 915 TObjArray* gridIds=0;
2bb7b766 916
917 Bool_t result = kTRUE;
918
919 const char* type = 0;
920 TString backupURI;
921 if(gridURI == fgkMainCDB) {
922 type = "OCDB";
923 backupURI = fgkLocalCDB;
924 } else if(gridURI == fgkMainRefStorage) {
925 type = "reference";
926 backupURI = fgkLocalRefStorage;
927 } else {
928 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
929 return kFALSE;
930 }
931
932 AliCDBManager* man = AliCDBManager::Instance();
933
934 AliCDBStorage *gridSto = man->GetStorage(gridURI);
935 if(!gridSto) {
936 Log(fCurrentDetector.Data(),
937 Form("TryToStoreAgain - cannot activate main %s storage", type));
938 return kFALSE;
939 }
940
941 gridIds = gridSto->GetQueryCDBList();
942
943 // get objects previously stored in local CDB
944 AliCDBStorage *backupSto = man->GetStorage(backupURI);
945 AliCDBPath aPath(fCurrentDetector,"*","*");
946 // Local objects were stored with current run as Grid version!
947 TList* localEntries = backupSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
948 localEntries->SetOwner(1);
949
950 // loop on local stored objects
951 TIter localIter(localEntries);
952 AliCDBEntry *aLocEntry = 0;
953 while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
954 aLocEntry->SetOwner(1);
955 AliCDBId aLocId = aLocEntry->GetId();
956 aLocEntry->SetVersion(-1);
957 aLocEntry->SetSubVersion(-1);
958
959 // loop on Grid valid Id's
960 Bool_t store = kTRUE;
961 TIter gridIter(gridIds);
962 AliCDBId* aGridId = 0;
963 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
964 // If local object is valid up to infinity we store it anyway
965 // TODO This does not work! It may hide more recent objects...
966 if(aLocId.GetLastRun() == AliCDBRunRange::Infinity()) {
967 // TODO Check that it won't hide more recent files! how????
968 break;
969 }
970 if(aGridId->GetPath() != aLocId.GetPath()) continue;
971 // skip all objects valid up to infinity
972 if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
973 // if we get here, it means there's already some more recent object stored on Grid!
974 store = kFALSE;
975 break;
976 }
977
978 if(!store){
979 Log(fCurrentDetector.Data(),
980 Form("TryToStoreAgain - A more recent object already exists in %s storage: <%s>",
981 type, aGridId->ToString().Data()));
982 // removing local filename...
983 // TODO maybe it's better not to remove it, it was not copied to the Grid!
984 TString filename;
985 backupSto->IdToFilename(aLocId, filename);
986 AliInfo(Form("Removing local file %s", filename.Data()));
987 gSystem->Exec(Form("rm %s",filename.Data()));
988 continue;
989 }
990
991 // If we get here, the file can be stored!
992 Bool_t storeOk = gridSto->Put(aLocEntry);
993 if(storeOk){
994 Log(fCurrentDetector.Data(),
995 Form("TryToStoreAgain - Object <%s> successfully put into %s storage",
996 aLocId.ToString().Data(), type));
997
998 // removing local filename...
999 TString filename;
1000 backupSto->IdToFilename(aLocId, filename);
1001 AliInfo(Form("Removing local file %s", filename.Data()));
1002 gSystem->Exec(Form("rm %s", filename.Data()));
1003 continue;
1004 } else {
1005 Log(fCurrentDetector.Data(),
1006 Form("TryToStoreAgain - Grid %s storage of object <%s> failed again",
1007 type, aLocId.ToString().Data()));
1008 result = kFALSE;
1009 }
1010 }
1011 localEntries->Clear();
1012
1013 return result;
73abe331 1014}
1015
b948db8d 1016//______________________________________________________________________________________________
73abe331 1017Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* alias,
2bb7b766 1018 TObjArray* valueSet)
73abe331 1019{
58bc3020 1020// Retrieve all "alias" data points from the DCS server
1021// host, port: TSocket connection parameters
1022// alias: name of the alias
2bb7b766 1023// valueSet: array of retrieved AliDCSValue's
58bc3020 1024
73abe331 1025 AliDCSClient client(host, port, fTimeout, fRetries);
1026 if (!client.IsConnected()) {
b948db8d 1027 return kFALSE;
73abe331 1028 }
1029
57f50b3c 1030 Int_t result = client.GetAliasValues(alias,
73abe331 1031 GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1032
1033 if (result < 0) {
2bb7b766 1034 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
73abe331 1035 alias, AliDCSClient::GetErrorString(result)));
1036
1037 if (result == AliDCSClient::fgkServerError) {
2bb7b766 1038 Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
73abe331 1039 client.GetServerError().Data()));
1040 }
1041
1042 return kFALSE;
1043 }
1044
1045 return kTRUE;
1046}
b948db8d 1047
1048//______________________________________________________________________________________________
57f50b3c 1049const char* AliShuttle::GetFile(Int_t system, const char* detector,
1050 const char* id, const char* source)
b948db8d 1051{
57f50b3c 1052// Get calibration file from file exchange servers
1053// calls specific getter according to system index (kDAQ, kDCS, kHLT)
1054
1055 switch(system){
1056 case kDAQ:
1057 return GetDAQFileName(detector, id, source);
1058 break;
1059 case kDCS:
1060 return GetDCSFileName(detector, id, source);
1061 break;
1062 case kHLT:
1063 return GetHLTFileName(detector, id, source);
1064 break;
1065 default:
1066 AliError(Form("No valid system index: %d",system));
1067 }
b948db8d 1068
b948db8d 1069 return 0;
1070}
1071
b948db8d 1072//______________________________________________________________________________________________
57f50b3c 1073TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
b948db8d 1074{
57f50b3c 1075// Get sources producing the condition file Id from file exchange servers
1076// calls specific getter according to system index (kDAQ, kDCS, kHLT)
1077
1078 switch(system){
1079 case kDAQ:
1080 return GetDAQFileSources(detector, id);
1081 break;
1082 case kDCS:
1083 return GetDCSFileSources(detector, id);
1084 break;
1085 case kHLT:
1086 return GetHLTFileSources(detector, id);
1087 break;
1088 default:
1089 AliError(Form("No valid system index: %d",system));
1090 }
1091
1092 return NULL;
1093}
1094
1095//______________________________________________________________________________________________
2bb7b766 1096Bool_t AliShuttle::Connect(Int_t system)
1097{
57f50b3c 1098// Connect to MySQL Server of the system's FES logbook
2bb7b766 1099// DAQ Logbook, Shuttle Logbook and DAQ FES Logbook are on the same host
57f50b3c 1100
1101 // check connection: if already connected return
1102 if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
1103
1104 TString aFESlbHost= Form("mysql://%s", fConfig->GetFESlbHost(system));
1105
1106 fServer[system] = TSQLServer::Connect(aFESlbHost,
1107 fConfig->GetFESlbUser(system),
1108 fConfig->GetFESlbPass(system));
1109 if (!fServer[system] || !fServer[system]->IsConnected()) {
2bb7b766 1110 AliError(Form("Can't establish connection to FES logbook for %s",fkSystemNames[system]));
1111 if(fServer[system]) delete fServer[system];
57f50b3c 1112 return kFALSE;
1113 }
1114
1115 // Get tables
1116 // TODO in the configuration should the table name be there too?
2bb7b766 1117 TSQLResult* aResult=0;
57f50b3c 1118 switch(system){
1119 case kDAQ:
2bb7b766 1120 aResult = fServer[kDAQ]->GetTables("REFSYSLOG");
57f50b3c 1121 break;
1122 case kDCS:
2bb7b766 1123 //aResult = fServer[kDCS]->GetTables("REFSYSLOG");
57f50b3c 1124 break;
1125 case kHLT:
2bb7b766 1126 //aResult = fServer[kHLT]->GetTables("REFSYSLOG");
57f50b3c 1127 break;
1128 default:
1129 break;
1130 }
1131
2bb7b766 1132 delete aResult;
57f50b3c 1133 return kTRUE;
1134}
1135
1136//______________________________________________________________________________________________
2bb7b766 1137const char* AliShuttle::GetDAQFileName(const char* detector, const char* id, const char* source)
1138{
57f50b3c 1139// Retrieves a file from the DAQ FES.
1140// First queris the DAQ logbook_fs for the DAQ file name, using the run, detector, id and source info
1141// then calls RetrieveDAQFile(DAQfilename) for actual copy to local disk
2bb7b766 1142// run: current run being processed (given by Logbook entry fLogbookEntry)
57f50b3c 1143// detector: comes from the Preprocessor name (must be converted into detector code with GetDetCode)
1144// id: provided as a parameter by the Preprocessor
1145// source: provided by the Preprocessor through GetFileSources function
1146
1147 // check connection, in case connect
1148 if(!Connect(kDAQ)){
2bb7b766 1149 Log(detector, "GetDAQFileName - Couldn't connect to DAQ Logbook");
57f50b3c 1150 return 0;
1151 }
1152
1153 // Query preparation
1154 TString sqlQueryStart = "select filePath from logbook_fs where";
1155 TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\"",
2bb7b766 1156 GetCurrentRun(), GetDetCode(detector), id, source);
57f50b3c 1157 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1158
84090f85 1159 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
57f50b3c 1160
1161 // Query execution
2bb7b766 1162 TSQLResult* aResult = 0;
1163 aResult = dynamic_cast<TSQLResult*> (fServer[kDAQ]->Query(sqlQuery));
57f50b3c 1164 if (!aResult) {
2bb7b766 1165 Log(detector, Form("GetDAQFileName - Can't execute SQL query for: id = %s, source = %s",
1166 id, source));
57f50b3c 1167 return 0;
1168 }
1169
1170 if (aResult->GetRowCount() == 0) {
1171 Log(detector,
2bb7b766 1172 Form("GetDAQFileName - No entry in FES table for: id = %s, source = %s",
1173 id, source));
57f50b3c 1174 delete aResult;
1175 return 0;
1176 }
1177
1178 if (aResult->GetRowCount() >1) {
1179 Log(detector,
2bb7b766 1180 Form("GetDAQFileName - More than one entry in FES table for: id = %s, source = %s",
1181 id, source));
57f50b3c 1182 delete aResult;
1183 return 0;
1184 }
1185
2bb7b766 1186 TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
57f50b3c 1187
1188 if(!aRow){
2bb7b766 1189 Log(detector, Form("GetDAQFileName - Empty set result from query: id = %s, source = %s",
1190 id, source));
57f50b3c 1191 delete aResult;
1192 return 0;
1193 }
1194
1195 TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1196
1197 delete aResult;
2bb7b766 1198 delete aRow;
57f50b3c 1199
84090f85 1200 AliDebug(2, Form("filePath = %s",filePath.Data()));
57f50b3c 1201
1202 // retrieved file is renamed to make it unique
1203 TString localFileName = Form("%s_%d_%s_%s.shuttle",
2bb7b766 1204 detector, GetCurrentRun(), id, source);
57f50b3c 1205
1206 // file retrieval from DAQ FES
1207 Bool_t result = RetrieveDAQFile(filePath.Data(), localFileName.Data());
1208 if(!result) {
2bb7b766 1209 Log(detector, Form("GetDAQFileName - Copy of file %s from DAQ FES failed", filePath.Data()));
57f50b3c 1210 return 0;
1211 } else {
2bb7b766 1212 AliInfo(Form("File %s copied from DAQ FES into %s/%s",
57f50b3c 1213 filePath.Data(), fgkShuttleTempDir, localFileName.Data()));
1214 }
1215
1216
1217 fFESCalled[kDAQ]=kTRUE;
1218 TObjString *fileParams = new TObjString(Form("%s_!?!_%s", id, source));
1219 fFESlist[kDAQ].Add(fileParams);
1220
1221 return localFileName.Data();
1222
1223}
1224
1225//______________________________________________________________________________________________
2bb7b766 1226Bool_t AliShuttle::RetrieveDAQFile(const char* daqFileName, const char* localFileName)
1227{
57f50b3c 1228
1229 // check temp directory: trying to cd to temp; if it does not exist, create it
84090f85 1230 AliDebug(2, Form("Copy file %s from DAQ FES into folder %s and rename it as %s",
57f50b3c 1231 daqFileName,fgkShuttleTempDir, localFileName));
1232
1233 void* dir = gSystem->OpenDirectory(fgkShuttleTempDir);
1234 if (dir == NULL) {
1235 if (gSystem->mkdir(fgkShuttleTempDir, kTRUE)) {
2bb7b766 1236 AliError(Form("Can't open directory <%s>", fgkShuttleTempDir));
57f50b3c 1237 return kFALSE;
1238 }
1239
1240 } else {
1241 gSystem->FreeDirectory(dir);
1242 }
1243
1244 TString baseDAQFESFolder = "DAQ";
1245 TString command = Form("scp %s@%s:%s/%s %s/%s",
1246 fConfig->GetFESUser(kDAQ),
1247 fConfig->GetFESHost(kDAQ),
1248 baseDAQFESFolder.Data(),
1249 daqFileName,
1250 fgkShuttleTempDir,
1251 localFileName);
1252
84090f85 1253 AliDebug(2, Form("%s",command.Data()));
57f50b3c 1254
1255 UInt_t nRetries = 0;
1256 UInt_t maxRetries = 3;
1257
1258 // copy!! if successful TSystem::Exec returns 0
1259 while(nRetries++ < maxRetries) {
84090f85 1260 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
57f50b3c 1261 if(gSystem->Exec(command.Data()) == 0) return kTRUE;
1262 }
1263
1264 return kFALSE;
1265
1266}
1267
1268//______________________________________________________________________________________________
2bb7b766 1269TList* AliShuttle::GetDAQFileSources(const char* detector, const char* id)
1270{
57f50b3c 1271// Retrieves a file from the DCS FES.
1272
1273 // check connection, in case connect
1274 if(!Connect(kDAQ)){
2bb7b766 1275 Log(detector, "GetDAQFileSources - Couldn't connect to DAQ Logbook");
57f50b3c 1276 return 0;
1277 }
1278
1279 // Query preparation
1280 TString sqlQueryStart = "select DAQsource from logbook_fs where";
1281 TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
2bb7b766 1282 GetCurrentRun(), GetDetCode(detector), id);
57f50b3c 1283 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1284
84090f85 1285 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
57f50b3c 1286
1287 // Query execution
1288 TSQLResult* aResult;
1289 aResult = fServer[kDAQ]->Query(sqlQuery);
1290 if (!aResult) {
2bb7b766 1291 Log(detector, Form("GetDAQFileSources - Can't execute SQL query for id: %s", id));
57f50b3c 1292 return 0;
1293 }
1294
1295 if (aResult->GetRowCount() == 0) {
1296 Log(detector,
2bb7b766 1297 Form("GetDAQFileSources - No entry in FES table for id: %s", id));
57f50b3c 1298 delete aResult;
1299 return 0;
1300 }
1301
1302 TSQLRow* aRow;
1303 TList *list = new TList();
1304 list->SetOwner(1);
1305
1306 while((aRow = aResult->Next())){
1307
1308 TString daqSource(aRow->GetField(0), aRow->GetFieldLength(0));
84090f85 1309 AliDebug(2, Form("daqSource = %s", daqSource.Data()));
57f50b3c 1310 list->Add(new TObjString(daqSource));
2bb7b766 1311 delete aRow;
57f50b3c 1312 }
1313 delete aResult;
1314
1315 return list;
1316
1317}
1318
1319//______________________________________________________________________________________________
2bb7b766 1320const char* AliShuttle::GetDCSFileName(const char* /*detector*/, const char* /*id*/, const char* /*source*/){
1321// Retrieves a file from the DCS FES.
1322
1323return "You're in DCS";
1324
1325}
1326
1327//______________________________________________________________________________________________
1328TList* AliShuttle::GetDCSFileSources(const char* /*detector*/, const char* /*id*/){
1329// Retrieves a file from the DCS FES.
1330
1331return NULL;
1332
1333}
1334
1335//______________________________________________________________________________________________
1336const char* AliShuttle::GetHLTFileName(const char* /*detector*/, const char* /*id*/, const char* /*source*/){
1337// Retrieves a file from the HLT FES.
1338
1339return "You're in HLT";
1340
1341}
1342
1343//______________________________________________________________________________________________
1344TList* AliShuttle::GetHLTFileSources(const char* /*detector*/, const char* /*id*/){
1345// Retrieves a file from the HLT FES.
1346
1347return NULL;
1348
1349}
1350
1351//______________________________________________________________________________________________
1352Bool_t AliShuttle::UpdateDAQTable()
1353{
57f50b3c 1354// Update DAQ table filling time_processed field in all rows corresponding to current run and detector
1355
1356 // check connection, in case connect
1357 if(!Connect(kDAQ)){
2bb7b766 1358 Log(fCurrentDetector, "UpdateDAQTable - Couldn't connect to DAQ Logbook");
57f50b3c 1359 return kFALSE;
1360 }
1361
1362 TTimeStamp now; // now
1363
1364 // Loop on FES list entries
1365 TIter iter(&fFESlist[kDAQ]);
1366 TObjString *aFESentry=0;
1367 while((aFESentry = dynamic_cast<TObjString*> (iter.Next()))){
1368 TString aFESentrystr = aFESentry->String();
1369 TObjArray *aFESarray = aFESentrystr.Tokenize("_!?!_");
1370 if(!aFESarray || aFESarray->GetEntries() != 2 ) {
2bb7b766 1371 Log(fCurrentDetector, Form("UpdateDAQTable - error updating FES entry. Check string: <%s>",
57f50b3c 1372 aFESentrystr.Data()));
1373 if(aFESarray) delete aFESarray;
1374 return kFALSE;
1375 }
1376 const char* fileId = ((TObjString*) aFESarray->At(0))->GetName();
1377 const char* daqSource = ((TObjString*) aFESarray->At(1))->GetName();
1378 TString whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
2bb7b766 1379 GetCurrentRun(), GetDetCode(fCurrentDetector), fileId, daqSource);
57f50b3c 1380
1381 delete aFESarray;
1382
1383 TString sqlQuery = Form("update logbook_fs set time_processed=%d %s", now.GetSec(), whereClause.Data());
1384
84090f85 1385 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
57f50b3c 1386
1387 // Query execution
1388 TSQLResult* aResult;
1389 aResult = dynamic_cast<TSQLResult*> (fServer[kDAQ]->Query(sqlQuery));
1390 if (!aResult) {
2bb7b766 1391 Log(fCurrentDetector, Form("UpdateDAQTable - Can't execute SQL query <%s>", sqlQuery.Data()));
57f50b3c 1392 return kFALSE;
1393 }
1394 delete aResult;
2bb7b766 1395 }
57f50b3c 1396
2bb7b766 1397 return kTRUE;
1398}
57f50b3c 1399
57f50b3c 1400
2bb7b766 1401//______________________________________________________________________________________________
1402Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
1403{
1404// Update Shuttle logbook filling detector or shuttle_done column
1405// ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
57f50b3c 1406
2bb7b766 1407 // check connection, in case connect
1408 if(!Connect(kDAQ)){
1409 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
1410 return kFALSE;
57f50b3c 1411 }
1412
2bb7b766 1413 TString detName(detector);
1414 TString setClause;
1415 if(detName == "shuttle_done") {
1416 setClause = "set shuttle_done=1";
1417 } else {
1418 TString detCode = GetDetCode(detector);
1419 if(detCode.IsNull()) {
1420 Log("SHUTTLE", Form("UpdateShuttleLogbook - Unknown detector %s", detector));
57f50b3c 1421 return kFALSE;
1422 }
2bb7b766 1423 TString statusStr(status);
1424 if(statusStr.Contains("done", TString::kIgnoreCase) ||
1425 statusStr.Contains("failed", TString::kIgnoreCase)){
1426 setClause = Form("set %s=\"%s\"", detCode.Data(), status);
1427 } else {
1428 Log("SHUTTLE",
1429 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
1430 status, detector));
1431 return kFALSE;
1432 }
1433 }
57f50b3c 1434
2bb7b766 1435 TString whereClause = Form("where run=%d", GetCurrentRun());
1436
1437 TString sqlQuery = Form("update logbook_shuttle %s %s",
1438 setClause.Data(), whereClause.Data());
57f50b3c 1439
2bb7b766 1440 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1441
1442 // Query execution
1443 TSQLResult* aResult;
1444 aResult = dynamic_cast<TSQLResult*> (fServer[kDAQ]->Query(sqlQuery));
1445 if (!aResult) {
1446 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
1447 return kFALSE;
57f50b3c 1448 }
2bb7b766 1449 delete aResult;
57f50b3c 1450
1451 return kTRUE;
1452}
1453
1454//______________________________________________________________________________________________
2bb7b766 1455Int_t AliShuttle::GetCurrentRun() const
1456{
1457// Get current run from logbook entry
57f50b3c 1458
2bb7b766 1459 return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
57f50b3c 1460}
1461
1462//______________________________________________________________________________________________
2bb7b766 1463UInt_t AliShuttle::GetCurrentStartTime() const
1464{
1465// get current start time
57f50b3c 1466
2bb7b766 1467 return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
57f50b3c 1468}
1469
1470//______________________________________________________________________________________________
2bb7b766 1471UInt_t AliShuttle::GetCurrentEndTime() const
1472{
1473// get current end time from logbook entry
57f50b3c 1474
2bb7b766 1475 return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
57f50b3c 1476}
1477
1478//______________________________________________________________________________________________
2bb7b766 1479const char* AliShuttle::GetDetCode(const char* detector){
1480// Return detector code
57f50b3c 1481
2bb7b766 1482 for(UInt_t iDet=0; iDet < kNDetectors; iDet++){
1483 if(!strcmp(fgkDetectorName[iDet], detector)) return fgkDetectorCode[iDet];
1484 }
57f50b3c 1485
2bb7b766 1486 AliErrorClass(Form("Unknown detector: %s",detector));
1487 return 0;
57f50b3c 1488}
1489
1490//______________________________________________________________________________________________
2bb7b766 1491const char* AliShuttle::GetDetCode(UInt_t detPos){
57f50b3c 1492// Return detector code
1493
2bb7b766 1494 if( detPos >= kNDetectors) {
1495 AliErrorClass(Form("Invalid parameter: %d", detPos));
1496 return 0;
57f50b3c 1497 }
2bb7b766 1498 return fgkDetectorCode[detPos];
1499}
b948db8d 1500
2bb7b766 1501//______________________________________________________________________________________________
1502const Int_t AliShuttle::GetDetPos(const char* detCode){
1503// Return detector position in the detector code array
1504
1505 for(UInt_t iDet=0; iDet < kNDetectors; iDet++){
1506 if(!strcmp(fgkDetectorCode[iDet], detCode)) return iDet;
1507 }
1508 return -1;
b948db8d 1509}
1510
1511//______________________________________________________________________________________________
1512void AliShuttle::Log(const char* detector, const char* message)
1513{
58bc3020 1514// Fill log string with a message
b948db8d 1515
84090f85 1516 void* dir = gSystem->OpenDirectory(fgkShuttleLogDir);
1517 if (dir == NULL) {
1518 if (gSystem->mkdir(fgkShuttleLogDir, kTRUE)) {
2bb7b766 1519 AliError(Form("Can't open directory <%s>", fgkShuttleTempDir));
84090f85 1520 return;
1521 }
b948db8d 1522
84090f85 1523 } else {
1524 gSystem->FreeDirectory(dir);
1525 }
b948db8d 1526
cb343cfd 1527 TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
2bb7b766 1528 if(GetCurrentRun()>=0 ) toLog += Form("run %d - ", GetCurrentRun());
1529 toLog += Form("%s", message);
1530
84090f85 1531 AliInfo(toLog.Data());
b948db8d 1532
84090f85 1533 TString fileName;
1534 fileName.Form("%s/%s.log", fgkShuttleLogDir, detector);
1535 gSystem->ExpandPathName(fileName);
1536
1537 ofstream logFile;
1538 logFile.open(fileName, ofstream::out | ofstream::app);
1539
1540 if (!logFile.is_open()) {
1541 AliError(Form("Could not open file %s", fileName.Data()));
1542 return;
1543 }
7bfb2090 1544
84090f85 1545 logFile << toLog.Data() << "\n";
b948db8d 1546
84090f85 1547 logFile.close();
b948db8d 1548}
2bb7b766 1549
2bb7b766 1550//______________________________________________________________________________________________
1551Bool_t AliShuttle::Collect(Int_t run)
1552{
1553 //
cb343cfd 1554 // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
1555 // If a dedicated run is given this run is processed
1556 //
2bb7b766 1557 // In operational mode, this is the Shuttle function triggered by the EOR signal.
1558 //
1559
cb343cfd 1560 if (run == -1)
1561 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
1562 else
1563 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
1564
1565 SetLastAction("Starting");
2bb7b766 1566
1567 TString whereClause("where shuttle_done=0");
cb343cfd 1568 if (run != -1)
1569 whereClause += Form(" and run=%d", run);
2bb7b766 1570
1571 TObjArray shuttleLogbookEntries;
1572 if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries)) {
cb343cfd 1573 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2bb7b766 1574 return kFALSE;
1575 }
1576
1577 if (!RetrieveConditionsData(shuttleLogbookEntries)) {
cb343cfd 1578 Log("SHUTTLE", "Collect - Process of at least one run failed");
2bb7b766 1579 return kFALSE;
1580 }
1581
cb343cfd 1582 return kTRUE;
2bb7b766 1583}
1584
2bb7b766 1585//______________________________________________________________________________________________
1586Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
1587{
1588// Retrieve conditions data for all runs that aren't processed yet
1589
1590 Bool_t hasError = kFALSE;
1591
1592 TIter iter(&dateEntries);
1593 AliShuttleLogbookEntry* anEntry;
1594
1595 while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
1596 if (!Process(anEntry)){
1597 hasError = kTRUE;
1598 }
1599 }
1600
1601 return hasError == kFALSE;
1602}
cb343cfd 1603
1604//______________________________________________________________________________________________
1605ULong_t AliShuttle::GetTimeOfLastAction() const
1606{
1607 ULong_t tmp;
1608
1609 fMonitoringMutex->Lock();
1610
1611 tmp = fLastActionTime;
1612
1613 fMonitoringMutex->UnLock();
1614
1615 return tmp;
1616}
1617
1618//______________________________________________________________________________________________
1619const TString AliShuttle::GetLastAction() const
1620{
1621 // returns a string description of the last action
1622
1623 TString tmp;
1624
1625 fMonitoringMutex->Lock();
1626
1627 tmp = fLastAction;
1628
1629 fMonitoringMutex->UnLock();
1630
1631 return tmp;
1632}
1633
1634//______________________________________________________________________________________________
1635void AliShuttle::SetLastAction(const char* action)
1636{
1637 // updates the monitoring variables
1638
1639 fMonitoringMutex->Lock();
1640
1641 fLastAction = action;
1642 fLastActionTime = time(0);
1643
1644 fMonitoringMutex->UnLock();
1645}