]> git.uio.no Git - u/mrichter/AliRoot.git/blame - SHUTTLE/AliShuttle.cxx
introducing strict run ordering flag
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
CommitLineData
73abe331 1/**************************************************************************
2 * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3 * *
4 * Author: The ALICE Off-line Project. *
5 * Contributors are mentioned in the code where appropriate. *
6 * *
7 * Permission to use, copy, modify and distribute this software and its *
8 * documentation strictly for non-commercial purposes is hereby granted *
9 * without fee, provided that the above copyright notice appears in all *
10 * copies and that both the copyright notice and this permission notice *
11 * appear in the supporting documentation. The authors make no claims *
12 * about the suitability of this software for any purpose. It is *
13 * provided "as is" without express or implied warranty. *
14 **************************************************************************/
15
16/*
17$Log$
be48e3ea 18Revision 1.19 2006/11/06 14:23:04 jgrosseo
19major update (Alberto)
20o) reading of run parameters from the logbook
21o) online offline naming conversion
22o) standalone DCSclient package
23
eba76848 24Revision 1.18 2006/10/20 15:22:59 jgrosseo
25o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
26o) Merging Collect, CollectAll, CollectNew function
27o) Removing implementation of empty copy constructors (declaration still there!)
28
cb343cfd 29Revision 1.17 2006/10/05 16:20:55 jgrosseo
30adapting to new CDB classes
31
6ec0e06c 32Revision 1.16 2006/10/05 15:46:26 jgrosseo
33applying to the new interface
34
481441a2 35Revision 1.15 2006/10/02 16:38:39 jgrosseo
36update (alberto):
37fixed memory leaks
38storing of objects that failed to be stored to the grid before
39interfacing of shuttle status table in daq system
40
2bb7b766 41Revision 1.14 2006/08/29 09:16:05 jgrosseo
42small update
43
85a80aa9 44Revision 1.13 2006/08/15 10:50:00 jgrosseo
45effc++ corrections (alberto)
46
4f0ab988 47Revision 1.12 2006/08/08 14:19:29 jgrosseo
48Update to shuttle classes (Alberto)
49
50- Possibility to set the full object's path in the Preprocessor's and
51Shuttle's Store functions
52- Possibility to extend the object's run validity in the same classes
53("startValidity" and "validityInfinite" parameters)
54- Implementation of the StoreReferenceData function to store reference
55data in a dedicated CDB storage.
56
84090f85 57Revision 1.11 2006/07/21 07:37:20 jgrosseo
58last run is stored after each run
59
7bfb2090 60Revision 1.10 2006/07/20 09:54:40 jgrosseo
61introducing status management: The processing per subdetector is divided into several steps,
62after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
63can keep track of the number of failures and skips further processing after a certain threshold is
64exceeded. These thresholds can be configured in LDAP.
65
5164a766 66Revision 1.9 2006/07/19 10:09:55 jgrosseo
67new configuration, accesst to DAQ FES (Alberto)
68
57f50b3c 69Revision 1.8 2006/07/11 12:44:36 jgrosseo
70adding parameters for extended validity range of data produced by preprocessor
71
17111222 72Revision 1.7 2006/07/10 14:37:09 jgrosseo
73small fix + todo comment
74
e090413b 75Revision 1.6 2006/07/10 13:01:41 jgrosseo
76enhanced storing of last sucessfully processed run (alberto)
77
a7160fe9 78Revision 1.5 2006/07/04 14:59:57 jgrosseo
79revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
80
45a493ce 81Revision 1.4 2006/06/12 09:11:16 jgrosseo
82coding conventions (Alberto)
83
58bc3020 84Revision 1.3 2006/06/06 14:26:40 jgrosseo
85o) removed files that were moved to STEER
86o) shuttle updated to follow the new interface (Alberto)
87
b948db8d 88Revision 1.2 2006/03/07 07:52:34 hristov
89New version (B.Yordanov)
90
d477ad88 91Revision 1.6 2005/11/19 17:19:14 byordano
92RetrieveDATEEntries and RetrieveConditionsData added
93
94Revision 1.5 2005/11/19 11:09:27 byordano
95AliShuttle declaration added
96
97Revision 1.4 2005/11/17 17:47:34 byordano
98TList changed to TObjArray
99
100Revision 1.3 2005/11/17 14:43:23 byordano
101import to local CVS
102
103Revision 1.1.1.1 2005/10/28 07:33:58 hristov
104Initial import as subdirectory in AliRoot
105
73abe331 106Revision 1.2 2005/09/13 08:41:15 byordano
107default startTime endTime added
108
109Revision 1.4 2005/08/30 09:13:02 byordano
110some docs added
111
112Revision 1.3 2005/08/29 21:15:47 byordano
113some docs added
114
115*/
116
117//
118// This class is the main manager for AliShuttle.
119// It organizes the data retrieval from DCS and call the
b948db8d 120// interface methods of AliPreprocessor.
73abe331 121// For every detector in AliShuttleConfgi (see AliShuttleConfig),
122// data for its set of aliases is retrieved. If there is registered
b948db8d 123// AliPreprocessor for this detector then it will be used
124// accroding to the schema (see AliPreprocessor).
125// If there isn't registered AliPreprocessor than the retrieved
73abe331 126// data is stored automatically to the undelying AliCDBStorage.
127// For detSpec is used the alias name.
128//
129
130#include "AliShuttle.h"
131
132#include "AliCDBManager.h"
133#include "AliCDBStorage.h"
134#include "AliCDBId.h"
84090f85 135#include "AliCDBRunRange.h"
136#include "AliCDBPath.h"
5164a766 137#include "AliCDBEntry.h"
73abe331 138#include "AliShuttleConfig.h"
eba76848 139#include "DCSClient/AliDCSClient.h"
73abe331 140#include "AliLog.h"
b948db8d 141#include "AliPreprocessor.h"
5164a766 142#include "AliShuttleStatus.h"
2bb7b766 143#include "AliShuttleLogbookEntry.h"
73abe331 144
57f50b3c 145#include <TSystem.h>
58bc3020 146#include <TObject.h>
b948db8d 147#include <TString.h>
57f50b3c 148#include <TTimeStamp.h>
73abe331 149#include <TObjString.h>
57f50b3c 150#include <TSQLServer.h>
151#include <TSQLResult.h>
152#include <TSQLRow.h>
cb343cfd 153#include <TMutex.h>
73abe331 154
5164a766 155#include <fstream>
156
cb343cfd 157#include <sys/types.h>
158#include <sys/wait.h>
159
73abe331 160ClassImp(AliShuttle)
161
2bb7b766 162TString AliShuttle::fgkMainCDB("alien://folder=ShuttleCDB");
84090f85 163TString AliShuttle::fgkLocalCDB("local://LocalShuttleCDB");
2bb7b766 164TString AliShuttle::fgkMainRefStorage("alien://folder=ShuttleReference");
84090f85 165TString AliShuttle::fgkLocalRefStorage("local://LocalReferenceStorage");
166
4f0ab988 167Bool_t AliShuttle::fgkProcessDCS(kTRUE);
168
84090f85 169const char* AliShuttle::fgkShuttleTempDir = gSystem->ExpandPathName("$ALICE_ROOT/SHUTTLE/temp");
170const char* AliShuttle::fgkShuttleLogDir = gSystem->ExpandPathName("$ALICE_ROOT/SHUTTLE/log");
57f50b3c 171
b948db8d 172//______________________________________________________________________________________________
173AliShuttle::AliShuttle(const AliShuttleConfig* config,
174 UInt_t timeout, Int_t retries):
4f0ab988 175fConfig(config),
176fTimeout(timeout), fRetries(retries),
177fPreprocessorMap(),
2bb7b766 178fLogbookEntry(0),
eba76848 179fCurrentDetector(),
85a80aa9 180fStatusEntry(0),
cb343cfd 181fGridError(kFALSE),
182fMonitoringMutex(0),
eba76848 183fLastActionTime(0),
184fLastAction()
73abe331 185{
186 //
187 // config: AliShuttleConfig used
73abe331 188 // timeout: timeout used for AliDCSClient connection
189 // retries: the number of retries in case of connection error.
190 //
191
57f50b3c 192 if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
be48e3ea 193 for(int iSys=0;iSys<4;iSys++) {
57f50b3c 194 fServer[iSys]=0;
be48e3ea 195 if (iSys < 3)
196 fFESlist[iSys].SetOwner(kTRUE);
57f50b3c 197 }
2bb7b766 198 fPreprocessorMap.SetOwner(kTRUE);
be48e3ea 199
200 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
201 fFirstUnprocessed[iDet] = kFALSE;
202
cb343cfd 203 fMonitoringMutex = new TMutex();
58bc3020 204}
205
b948db8d 206//______________________________________________________________________________________________
57f50b3c 207AliShuttle::~AliShuttle()
58bc3020 208{
209// destructor
210
b948db8d 211 fPreprocessorMap.DeleteAll();
be48e3ea 212 for(int iSys=0;iSys<4;iSys++)
57f50b3c 213 if(fServer[iSys]) {
214 fServer[iSys]->Close();
215 delete fServer[iSys];
eba76848 216 fServer[iSys] = 0;
57f50b3c 217 }
2bb7b766 218
219 if (fStatusEntry){
220 delete fStatusEntry;
221 fStatusEntry = 0;
222 }
cb343cfd 223
224 if (fMonitoringMutex)
225 {
226 delete fMonitoringMutex;
227 fMonitoringMutex = 0;
228 }
73abe331 229}
230
b948db8d 231//______________________________________________________________________________________________
57f50b3c 232void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
58bc3020 233{
73abe331 234 //
b948db8d 235 // Registers new AliPreprocessor.
73abe331 236 // It uses GetName() for indentificator of the pre processor.
237 // The pre processor is registered it there isn't any other
238 // with the same identificator (GetName()).
239 //
240
eba76848 241 const char* detName = preprocessor->GetName();
242 if(GetDetPos(detName) < 0)
243 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
244
245 if (fPreprocessorMap.GetValue(detName)) {
246 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
73abe331 247 return;
248 }
249
eba76848 250 fPreprocessorMap.Add(new TObjString(detName), preprocessor);
73abe331 251}
b948db8d 252//______________________________________________________________________________________________
84090f85 253UInt_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
254 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
73abe331 255{
84090f85 256 // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
257 // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
258 // using this function. Use StoreReferenceData instead!
85a80aa9 259 // It calls WriteToCDB function which perform actual storage
b948db8d 260
85a80aa9 261 return WriteToCDB(fgkMainCDB, fgkLocalCDB, path, object,
262 metaData, validityStart, validityInfinite);
84090f85 263
264}
265
266//______________________________________________________________________________________________
481441a2 267UInt_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
84090f85 268{
269 // Stores a CDB object in the storage for reference data. This objects will not be available during
270 // offline reconstrunction. Use this function for reference data only!
85a80aa9 271 // It calls WriteToCDB function which perform actual storage
272
481441a2 273 return WriteToCDB(fgkMainRefStorage, fgkLocalRefStorage, path, object, metaData);
84090f85 274
85a80aa9 275}
276
277//______________________________________________________________________________________________
278UInt_t AliShuttle::WriteToCDB(const char* mainUri, const char* localUri,
279 const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
280 Int_t validityStart, Bool_t validityInfinite)
281{
282 // write object into the CDB. Parameters are passed by Store and StoreReferenceData functions.
283 // The parameters are:
284 // 1) Uri of the main storage (Grid)
285 // 2) Uri of the backup storage (Local)
286 // 3) the object's path.
287 // 4) the object to be stored
288 // 5) the metaData to be associated with the object
289 // 6) the validity start run number w.r.t. the current run,
84090f85 290 // if the data is valid only for this run leave the default 0
85a80aa9 291 // 7) specifies if the calibration data is valid for infinity (this means until updated),
84090f85 292 // typical for calibration runs, the default is kFALSE
293 //
84090f85 294 // returns 0 if fail
85a80aa9 295 // 1 if stored in main (Grid) storage
296 // 2 if stored in backup (Local) storage
84090f85 297
85a80aa9 298 const char* cdbType = (mainUri == fgkMainCDB) ? "CDB" : "Reference";
2bb7b766 299
85a80aa9 300 Int_t firstRun = GetCurrentRun() - validityStart;
84090f85 301 if(firstRun < 0) {
2bb7b766 302 AliError("First valid run happens to be less than 0! Setting it to 0.");
84090f85 303 firstRun=0;
304 }
305
306 Int_t lastRun = -1;
307 if(validityInfinite) {
308 lastRun = AliCDBRunRange::Infinity();
309 } else {
310 lastRun = GetCurrentRun();
311 }
312
2bb7b766 313 AliCDBId id(path, firstRun, lastRun, -1, -1);
314
315 if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
316 TObjString runUsed = Form("%d", GetCurrentRun());
317 metaData->SetProperty("RunUsed(TObjString)",&runUsed);
318 }
84090f85 319
320 UInt_t result = 0;
321
85a80aa9 322 if (!(AliCDBManager::Instance()->GetStorage(mainUri))) {
2bb7b766 323 AliError(Form("WriteToCDB - Cannot activate main %s storage", cdbType));
84090f85 324 } else {
85a80aa9 325 result = (UInt_t) AliCDBManager::Instance()->GetStorage(mainUri)
84090f85 326 ->Put(object, id, metaData);
327 }
328
329 if(!result) {
330
331 Log(fCurrentDetector,
2bb7b766 332 Form("WriteToCDB - Problem with main %s storage. Putting <%s> into backup storage",
333 cdbType, path.GetPath().Data()));
334
335 // Set Grid version to current run number, to ease retrieval later
336 id.SetVersion(GetCurrentRun());
84090f85 337
85a80aa9 338 result = AliCDBManager::Instance()->GetStorage(localUri)
84090f85 339 ->Put(object, id, metaData);
340
341 if(result) {
342 result = 2;
85a80aa9 343 fGridError = kTRUE;
84090f85 344 }else{
2bb7b766 345 Log(fCurrentDetector, "WriteToCDB - Can't store data!");
b948db8d 346 }
347 }
2bb7b766 348
b948db8d 349 return result;
350
73abe331 351}
352
b948db8d 353//______________________________________________________________________________________________
5164a766 354AliShuttleStatus* AliShuttle::ReadShuttleStatus()
355{
2bb7b766 356// Reads the AliShuttleStatus from the CDB
5164a766 357
2bb7b766 358 if (fStatusEntry){
359 delete fStatusEntry;
360 fStatusEntry = 0;
361 }
5164a766 362
2bb7b766 363 fStatusEntry = AliCDBManager::Instance()->GetStorage(AliShuttle::GetLocalCDB())
364 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
5164a766 365
2bb7b766 366 if (!fStatusEntry) return 0;
367 fStatusEntry->SetOwner(1);
5164a766 368
2bb7b766 369 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
370 if (!status) {
371 AliError("Invalid object stored to CDB!");
372 return 0;
373 }
5164a766 374
2bb7b766 375 return status;
5164a766 376}
377
378//______________________________________________________________________________________________
7bfb2090 379Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
5164a766 380{
2bb7b766 381// writes the status for one subdetector
382
383 if (fStatusEntry){
384 delete fStatusEntry;
385 fStatusEntry = 0;
386 }
5164a766 387
2bb7b766 388 Int_t run = GetCurrentRun();
5164a766 389
2bb7b766 390 AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
5164a766 391
2bb7b766 392 fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
393 fStatusEntry->SetOwner(1);
5164a766 394
2bb7b766 395 UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
7bfb2090 396
2bb7b766 397 if (!result) {
398 AliError(Form("WriteShuttleStatus for %s, run %d failed", fCurrentDetector.Data(), run));
399 return kFALSE;
400 }
7bfb2090 401
2bb7b766 402 return kTRUE;
5164a766 403}
404
405//______________________________________________________________________________________________
406void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
407{
408 // changes the AliShuttleStatus for the given detector and run to the given status
409
2bb7b766 410 if (!fStatusEntry){
411 AliError("UNEXPECTED: fStatusEntry empty");
412 return;
413 }
5164a766 414
2bb7b766 415 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
5164a766 416
2bb7b766 417 if (!status){
418 AliError("UNEXPECTED: status could not be read from current CDB entry");
419 return;
420 }
5164a766 421
eba76848 422 TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
423 fCurrentDetector.Data(),
424 status->GetStatusName(),
425 status->GetStatusName(newStatus));
cb343cfd 426 Log("SHUTTLE", actionStr);
427 SetLastAction(actionStr);
5164a766 428
2bb7b766 429 status->SetStatus(newStatus);
430 if (increaseCount) status->IncreaseCount();
5164a766 431
2bb7b766 432 AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
5164a766 433}
5164a766 434//______________________________________________________________________________________________
435Bool_t AliShuttle::ContinueProcessing()
436{
2bb7b766 437// this function reads the AliShuttleStatus information from CDB and
438// checks if the processing should be continued
439// if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
440
2bb7b766 441 AliShuttleLogbookEntry::Status entryStatus =
eba76848 442 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
2bb7b766 443
444 if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
eba76848 445 Log("SHUTTLE", Form("ContinueProcessing - %s is %s",
2bb7b766 446 fCurrentDetector.Data(),
447 fLogbookEntry->GetDetectorStatusName(entryStatus)));
448 return kFALSE;
449 }
450
451 // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
be48e3ea 452
453 // check if current run is first unprocessed run for current detector
454 if (fConfig->StrictRunOrder(fCurrentDetector) &&
455 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
456 {
457 Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
458 return kFALSE;
459 }
460
2bb7b766 461 AliShuttleStatus* status = ReadShuttleStatus();
462 if (!status) {
463 // first time
464 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
465 fCurrentDetector.Data()));
466 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
467 return WriteShuttleStatus(status);
468 }
469
470 // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
471 // If it happens it may mean Logbook updating failed... let's do it now!
472 if (status->GetStatus() == AliShuttleStatus::kDone ||
473 status->GetStatus() == AliShuttleStatus::kFailed){
474 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
475 fCurrentDetector.Data(),
476 status->GetStatusName(status->GetStatus())));
477 UpdateShuttleLogbook(fCurrentDetector.Data(),
478 status->GetStatusName(status->GetStatus()));
479 return kFALSE;
480 }
481
482 if (status->GetStatus() == AliShuttleStatus::kStoreFailed) {
483 Log("SHUTTLE",
484 Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
485 fCurrentDetector.Data()));
486 if(TryToStoreAgain()){
487 Log(fCurrentDetector.Data(), "ContinueProcessing - All objects successfully stored into OCDB");
488 UpdateShuttleStatus(AliShuttleStatus::kDone);
489 UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
490 } else {
491 Log("SHUTTLE",
492 Form("ContinueProcessing - %s: Grid storage failed again",
493 fCurrentDetector.Data()));
494 }
495 return kFALSE;
496 }
497
498 // if we get here, there is a restart
499
500 // abort conditions
cb343cfd 501 if (status->GetCount() >= fConfig->GetMaxRetries()) {
2bb7b766 502 Log("SHUTTLE",
503 Form("ContinueProcessing - %s failed %d times in status %s - Updating Shuttle Logbook",
504 fCurrentDetector.Data(),
505 status->GetCount(), status->GetStatusName()));
506 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
507 return kFALSE;
508 }
509
eba76848 510 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. Aborted before with %s. Retry number %d.",
2bb7b766 511 fCurrentDetector.Data(),
512 status->GetStatusName(), status->GetCount()));
513
cb343cfd 514 UpdateShuttleStatus(AliShuttleStatus::kStarted, kTRUE);
2bb7b766 515
516 return kTRUE;
5164a766 517}
518
519//______________________________________________________________________________________________
2bb7b766 520Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
58bc3020 521{
73abe331 522 //
b948db8d 523 // Makes data retrieval for all detectors in the configuration.
2bb7b766 524 // entry: Shuttle logbook entry, contains run paramenters and status of detectors
525 // (Unprocessed, Inactive, Failed or Done).
d477ad88 526 // Returns kFALSE in case of error occured and kTRUE otherwise
73abe331 527 //
528
2bb7b766 529 if(!entry) return kFALSE;
530
531 fLogbookEntry = entry;
532
533 if(fLogbookEntry->IsDone()){
534 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
535 UpdateShuttleLogbook("shuttle_done");
536 fLogbookEntry = 0;
537 return kTRUE;
538 }
539
540
541 AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
542 GetCurrentRun()));
543
eba76848 544 fLogbookEntry->Print("all");
57f50b3c 545
546 // Initialization
d477ad88 547 Bool_t hasError = kFALSE;
57f50b3c 548 for(Int_t iSys=0;iSys<3;iSys++) fFESCalled[iSys]=kFALSE;
5164a766 549
2bb7b766 550 AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
551 if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
552 AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
553 if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
d477ad88 554
57f50b3c 555 // Loop on detectors in the configuration
b948db8d 556 TIter iter(fConfig->GetDetectors());
2bb7b766 557 TObjString* aDetector = 0;
b948db8d 558
be48e3ea 559 while ((aDetector = (TObjString*) iter.Next()))
560 {
7bfb2090 561 fCurrentDetector = aDetector->String();
5164a766 562
5164a766 563 if (!fConfig->HostProcessDetector(fCurrentDetector)) continue;
564
2bb7b766 565 AliPreprocessor* aPreprocessor =
566 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
be48e3ea 567 if (!aPreprocessor)
568 {
569 Log("SHUTTLE",Form("Process - %s: no preprocessor registered. Skipping",
eba76848 570 fCurrentDetector.Data()));
2bb7b766 571 continue;
572 }
573
7bfb2090 574 if (ContinueProcessing() == kFALSE) continue;
5164a766 575
2bb7b766 576 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START ******",
577 GetCurrentRun(), aDetector->GetName()));
578
85a80aa9 579
be48e3ea 580 Int_t pid = fork();
581
582 if (pid < 0)
583 {
584 Log("SHUTTLE", "ERROR: Forking failed");
585 }
586 else if (pid > 0)
587 {
588 // parent
589 AliInfo(Form("In parent process of %d - %s: Starting monitoring",
590 GetCurrentRun(), aDetector->GetName()));
591
592 Long_t begin = time(0);
593
594 int status; // to be used with waitpid, on purpose an int (not Int_t)!
595 while (waitpid(pid, &status, WNOHANG) == 0)
596 {
597 Long_t expiredTime = time(0) - begin;
598
599 if (expiredTime > fConfig->GetPPTimeOut())
600 {
601 Log("SHUTTLE", Form("Process time out. Run time: %d seconds. Killing...",
602 expiredTime));
603
604 kill(pid, 9);
605
606 hasError = kTRUE;
607
608 gSystem->Sleep(1000);
609 }
610 else
611 {
612 if (expiredTime % 60 == 0)
613 Log("SHUTTLE", Form("Checked process. Run time: %d seconds.",
614 expiredTime));
615 gSystem->Sleep(1000);
616 }
617 }
618
619 AliInfo(Form("In parent process of %d - %s: Client has terminated.",
620 GetCurrentRun(), aDetector->GetName()));
621
622 if (WIFEXITED(status))
623 {
624 Int_t returnCode = WEXITSTATUS(status);
625
626 Log("SHUTTLE", Form("The return code is %d", returnCode));
627
628 if (returnCode != 0)
629 hasError = kTRUE;
630 }
631 }
632 else if (pid == 0)
633 {
634 // client
635 AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
636
637 UInt_t result = ProcessCurrentDetector();
638
639 Int_t returnCode = 0; // will be set to 1 in case of an error
640
641 if (!result)
642 {
643 returnCode = 1;
644 AliInfo(Form("\n \t\t\t****** run %d - %s: PREPROCESSOR ERROR ****** \n\n",
645 GetCurrentRun(), aDetector->GetName()));
646 }
647 else if (result == 2)
648 {
649 AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
650 GetCurrentRun(), aDetector->GetName()));
651 } else
652 {
653 AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
654 GetCurrentRun(), aDetector->GetName()));
655 }
656
657 if (result > 0)
658 {
659 // Process successful: Update time_processed field in FES logbooks!
660 if (fFESCalled[kDAQ])
661 {
662 if (UpdateDAQTable() == kFALSE)
663 returnCode = 1;
664 fFESlist[kDAQ].Clear();
665 }
666 //if(fFESCalled[kDCS]) {
667 // if (UpdateDCSTable(aDetector->GetName()) == kFALSE)
668 // returnCode = 1;
669 // fFESlist[kDCS].Clear();
670 //}
671 //if(fFESCalled[kHLT]) {
672 // if (UpdateHLTTable(aDetector->GetName()) == kFALSE)
673 // returnCode = 1;
674 // fFESlist[kHLT].Clear();
675 //}
676 }
677
678 AliInfo(Form("Client process of %d - %s is exiting now with %d.",
679 GetCurrentRun(), aDetector->GetName(), returnCode));
680
681 // the client exits here
682 gSystem->Exit(returnCode);
683
684 AliError("We should never get here!!!");
685 }
7bfb2090 686 }
5164a766 687
2bb7b766 688 AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
689 GetCurrentRun()));
690
691 //check if shuttle is done for this run, if so update logbook
692 TObjArray checkEntryArray;
693 checkEntryArray.SetOwner(1);
694 TString whereClause = Form("where run=%d",GetCurrentRun());
be48e3ea 695 if (QueryShuttleLogbook(whereClause.Data(), checkEntryArray)) {
b948db8d 696
2bb7b766 697 AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
698 (checkEntryArray.At(0));
699
be48e3ea 700 if (checkEntry)
701 {
702 if (checkEntry->IsDone())
703 {
704 Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
705 UpdateShuttleLogbook("shuttle_done");
706 }
707 else
708 {
709 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
710 {
711 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
712 {
713 AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
714 checkEntry->GetRun(), GetDetName(iDet)));
715 fFirstUnprocessed[iDet] = kFALSE;
716 }
717 }
718 }
2bb7b766 719 }
720 }
721
722 fLogbookEntry = 0;
85a80aa9 723
a7160fe9 724 return hasError == kFALSE;
73abe331 725}
726
b948db8d 727//______________________________________________________________________________________________
2bb7b766 728UInt_t AliShuttle::ProcessCurrentDetector()
73abe331 729{
730 //
2bb7b766 731 // Makes data retrieval just for a specific detector (fCurrentDetector).
73abe331 732 // Threre should be a configuration for this detector.
73abe331 733
2bb7b766 734 AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
73abe331 735
7bfb2090 736 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
73abe331 737
7bfb2090 738 TString host(fConfig->GetDCSHost(fCurrentDetector));
5164a766 739 Int_t port = fConfig->GetDCSPort(fCurrentDetector);
740
741 TIter iter(fConfig->GetDCSAliases(fCurrentDetector));
73abe331 742 TObjString* anAlias;
b948db8d 743 TMap aliasMap;
2bb7b766 744 aliasMap.SetOwner(1);
73abe331 745
85a80aa9 746 Bool_t aDCSError = kFALSE;
747 fGridError = kFALSE;
d477ad88 748
b948db8d 749 while ((anAlias = (TObjString*) iter.Next())) {
2bb7b766 750 TObjArray *valueSet = new TObjArray();
751 valueSet->SetOwner(1);
4f0ab988 752 // TODO Test only... I've added a flag that allows to
753 // exclude DCS archive DB query
754 if(fgkProcessDCS){
755 AliInfo("Querying DCS archive DB data...");
85a80aa9 756 aDCSError = (GetValueSet(host, port, anAlias->String(), valueSet) == 0);
4f0ab988 757 } else {
758 AliInfo(Form("Skipping DCS processing. Port = %d",port));
85a80aa9 759 aDCSError = kFALSE;
4f0ab988 760 }
85a80aa9 761 if(!aDCSError) {
2bb7b766 762 aliasMap.Add(anAlias->Clone(), valueSet);
b948db8d 763 }else{
2bb7b766 764 Log(fCurrentDetector, Form("ProcessCurrentDetector - Error while retrieving alias %s",
765 anAlias->GetName()));
766 UpdateShuttleStatus(AliShuttleStatus::kDCSError, kTRUE);
767 aliasMap.DeleteAll();
768 return 0;
73abe331 769 }
770 }
b948db8d 771
2bb7b766 772 // DCS Archive DB processing successful. Call Preprocessor!
85a80aa9 773 UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
a7160fe9 774
85a80aa9 775 AliPreprocessor* aPreprocessor =
5164a766 776 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
b948db8d 777
2bb7b766 778 aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
2bb7b766 779 UInt_t aPPResult = aPreprocessor->Process(&aliasMap);
780
781 UInt_t returnValue = 0;
85a80aa9 782 if (aPPResult == 0) { // Preprocessor error
cb343cfd 783 UpdateShuttleStatus(AliShuttleStatus::kPPError);
2bb7b766 784 returnValue = 0;
85a80aa9 785 } else if (fGridError == kFALSE) { // process and Grid storage ok!
786 UpdateShuttleStatus(AliShuttleStatus::kDone);
2bb7b766 787 UpdateShuttleLogbook(fCurrentDetector, "DONE");
788 Log(fCurrentDetector.Data(),
789 "ProcessCurrentDetector - Preprocessor and Grid storage ended successfully");
790 returnValue = 1;
85a80aa9 791 } else { // Grid storage error (process ok, but object put in local storage)
792 UpdateShuttleStatus(AliShuttleStatus::kStoreFailed);
2bb7b766 793 returnValue = 2;
85a80aa9 794 }
b948db8d 795
2bb7b766 796 aliasMap.DeleteAll();
b948db8d 797
2bb7b766 798 return returnValue;
799}
800
801//______________________________________________________________________________________________
802Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
803 TObjArray& entries)
804{
805// Query DAQ's Shuttle logbook and fills detector status object.
806// Call QueryRunParameters to query DAQ logbook for run parameters.
807
808 // check connection, in case connect
be48e3ea 809 if(!Connect(3)) return kFALSE;
2bb7b766 810
811 TString sqlQuery;
812 sqlQuery = Form("select * from logbook_shuttle %s order by run", whereClause);
813
be48e3ea 814 TSQLResult* aResult = fServer[3]->Query(sqlQuery);
2bb7b766 815 if (!aResult) {
816 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
817 return kFALSE;
818 }
819
820 if(aResult->GetRowCount() == 0) {
821 if(sqlQuery.Contains("where shuttle_done=0")){
822 Log("SHUTTLE", "QueryShuttleLogbook - All runs in Shuttle Logbook are already DONE");
823 delete aResult;
824 return kTRUE;
825 } else {
826 AliError("No entries in Shuttle Logbook match request");
827 delete aResult;
828 return kFALSE;
829 }
830 }
831
832 // TODO Check field count!
833 const UInt_t nCols = 24;
834 if (aResult->GetFieldCount() != (Int_t) nCols) {
835 AliError("Invalid SQL result field number!");
836 delete aResult;
837 return kFALSE;
838 }
839
840 entries.SetOwner(1);
841
842 TSQLRow* aRow;
843 while ((aRow = aResult->Next())) {
844 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
845 Int_t run = runString.Atoi();
846
eba76848 847 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
848 if (!entry)
849 continue;
2bb7b766 850
851 // loop on detectors
eba76848 852 for(UInt_t ii = 0; ii < nCols; ii++)
853 entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
2bb7b766 854
eba76848 855 entries.AddLast(entry);
2bb7b766 856 delete aRow;
857 }
858
859 if(sqlQuery.Contains("where shuttle_done=0"))
860 Log("SHUTTLE", Form("QueryShuttleLogbook - Found %d unprocessed runs in Shuttle Logbook",
861 entries.GetEntriesFast()));
862 delete aResult;
863 return kTRUE;
864}
865
866//______________________________________________________________________________________________
eba76848 867AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
2bb7b766 868{
eba76848 869 //
870 // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
871 //
2bb7b766 872
873 // check connection, in case connect
be48e3ea 874 if (!Connect(3))
eba76848 875 return 0;
2bb7b766 876
877 TString sqlQuery;
eba76848 878 sqlQuery.Form("select * from logbook where run=%d", run);
2bb7b766 879
be48e3ea 880 TSQLResult* aResult = fServer[3]->Query(sqlQuery);
2bb7b766 881 if (!aResult) {
882 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
eba76848 883 return 0;
2bb7b766 884 }
885
eba76848 886 if (aResult->GetRowCount() == 0) {
2bb7b766 887 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
888 delete aResult;
eba76848 889 return 0;
2bb7b766 890 }
891
eba76848 892 if (aResult->GetRowCount() > 1) {
2bb7b766 893 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
894 delete aResult;
eba76848 895 return 0;
2bb7b766 896 }
897
eba76848 898 TSQLRow* aRow = aResult->Next();
899 if (!aRow)
900 {
901 AliError(Form("Could not retrieve row for run %d. Skipping", run));
902 delete aResult;
903 return 0;
904 }
2bb7b766 905
eba76848 906 AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
2bb7b766 907
eba76848 908 for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
909 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
2bb7b766 910
eba76848 911 UInt_t startTime = entry->GetStartTime();
912 UInt_t endTime = entry->GetEndTime();
913
914 if (!startTime || !endTime || startTime > endTime) {
915 Log("SHUTTLE",
916 Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
917 run, startTime, endTime));
918 delete entry;
2bb7b766 919 delete aRow;
eba76848 920 delete aResult;
921 return 0;
2bb7b766 922 }
923
eba76848 924 delete aRow;
2bb7b766 925 delete aResult;
eba76848 926
927 return entry;
2bb7b766 928}
929
930//______________________________________________________________________________________________
931Bool_t AliShuttle::TryToStoreAgain()
932{
933 // Called in case the detector failed to store the object in Grid OCDB
934 // It tries to store the object again, if it does not find more recent and overlapping objects
935 // Calls underlying TryToStoreAgain(const char*) function twice, for OCDB and Reference storage.
936
937 AliInfo("Trying to store OCDB data again...");
938 Bool_t resultCDB = TryToStoreAgain(fgkMainCDB);
939
940 AliInfo("Trying to store reference data again...");
941 Bool_t resultRef = TryToStoreAgain(fgkMainRefStorage);
942
943 return resultCDB && resultRef;
944}
945
946//______________________________________________________________________________________________
947Bool_t AliShuttle::TryToStoreAgain(TString& gridURI)
948{
949 // Called by TryToStoreAgain(), performs actual storage retry
950
6ec0e06c 951 TObjArray* gridIds=0;
2bb7b766 952
953 Bool_t result = kTRUE;
954
955 const char* type = 0;
956 TString backupURI;
957 if(gridURI == fgkMainCDB) {
958 type = "OCDB";
959 backupURI = fgkLocalCDB;
960 } else if(gridURI == fgkMainRefStorage) {
961 type = "reference";
962 backupURI = fgkLocalRefStorage;
963 } else {
964 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
965 return kFALSE;
966 }
967
968 AliCDBManager* man = AliCDBManager::Instance();
969
970 AliCDBStorage *gridSto = man->GetStorage(gridURI);
971 if(!gridSto) {
972 Log(fCurrentDetector.Data(),
973 Form("TryToStoreAgain - cannot activate main %s storage", type));
974 return kFALSE;
975 }
976
977 gridIds = gridSto->GetQueryCDBList();
978
979 // get objects previously stored in local CDB
980 AliCDBStorage *backupSto = man->GetStorage(backupURI);
eba76848 981 AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
2bb7b766 982 // Local objects were stored with current run as Grid version!
983 TList* localEntries = backupSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
984 localEntries->SetOwner(1);
985
986 // loop on local stored objects
987 TIter localIter(localEntries);
988 AliCDBEntry *aLocEntry = 0;
989 while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
990 aLocEntry->SetOwner(1);
991 AliCDBId aLocId = aLocEntry->GetId();
992 aLocEntry->SetVersion(-1);
993 aLocEntry->SetSubVersion(-1);
994
995 // loop on Grid valid Id's
996 Bool_t store = kTRUE;
997 TIter gridIter(gridIds);
998 AliCDBId* aGridId = 0;
999 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
be48e3ea 1000 // If local object is valid up to infinity we store it only if it is
1001 // the first unprocessed run!
1002 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity())
1003 {
1004 if (!fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1005 {
1006 Log(fCurrentDetector.Data(),
1007 ("TryToStoreAgain - This object has validity infinite but there are previous unprocessed runs!"));
1008 continue;
1009 } else {
1010 break;
1011 }
2bb7b766 1012 }
1013 if(aGridId->GetPath() != aLocId.GetPath()) continue;
1014 // skip all objects valid up to infinity
1015 if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
1016 // if we get here, it means there's already some more recent object stored on Grid!
1017 store = kFALSE;
1018 break;
1019 }
1020
1021 if(!store){
1022 Log(fCurrentDetector.Data(),
1023 Form("TryToStoreAgain - A more recent object already exists in %s storage: <%s>",
1024 type, aGridId->ToString().Data()));
1025 // removing local filename...
1026 // TODO maybe it's better not to remove it, it was not copied to the Grid!
1027 TString filename;
1028 backupSto->IdToFilename(aLocId, filename);
1029 AliInfo(Form("Removing local file %s", filename.Data()));
1030 gSystem->Exec(Form("rm %s",filename.Data()));
1031 continue;
1032 }
1033
1034 // If we get here, the file can be stored!
1035 Bool_t storeOk = gridSto->Put(aLocEntry);
1036 if(storeOk){
1037 Log(fCurrentDetector.Data(),
1038 Form("TryToStoreAgain - Object <%s> successfully put into %s storage",
1039 aLocId.ToString().Data(), type));
1040
1041 // removing local filename...
1042 TString filename;
1043 backupSto->IdToFilename(aLocId, filename);
1044 AliInfo(Form("Removing local file %s", filename.Data()));
1045 gSystem->Exec(Form("rm %s", filename.Data()));
1046 continue;
1047 } else {
1048 Log(fCurrentDetector.Data(),
1049 Form("TryToStoreAgain - Grid %s storage of object <%s> failed again",
1050 type, aLocId.ToString().Data()));
1051 result = kFALSE;
1052 }
1053 }
1054 localEntries->Clear();
1055
1056 return result;
73abe331 1057}
1058
b948db8d 1059//______________________________________________________________________________________________
73abe331 1060Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* alias,
2bb7b766 1061 TObjArray* valueSet)
73abe331 1062{
58bc3020 1063// Retrieve all "alias" data points from the DCS server
1064// host, port: TSocket connection parameters
1065// alias: name of the alias
2bb7b766 1066// valueSet: array of retrieved AliDCSValue's
58bc3020 1067
73abe331 1068 AliDCSClient client(host, port, fTimeout, fRetries);
1069 if (!client.IsConnected()) {
b948db8d 1070 return kFALSE;
73abe331 1071 }
1072
57f50b3c 1073 Int_t result = client.GetAliasValues(alias,
73abe331 1074 GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1075
1076 if (result < 0) {
2bb7b766 1077 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
73abe331 1078 alias, AliDCSClient::GetErrorString(result)));
1079
1080 if (result == AliDCSClient::fgkServerError) {
2bb7b766 1081 Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
73abe331 1082 client.GetServerError().Data()));
1083 }
1084
1085 return kFALSE;
1086 }
1087
1088 return kTRUE;
1089}
b948db8d 1090
1091//______________________________________________________________________________________________
57f50b3c 1092const char* AliShuttle::GetFile(Int_t system, const char* detector,
1093 const char* id, const char* source)
b948db8d 1094{
57f50b3c 1095// Get calibration file from file exchange servers
1096// calls specific getter according to system index (kDAQ, kDCS, kHLT)
1097
1098 switch(system){
1099 case kDAQ:
1100 return GetDAQFileName(detector, id, source);
1101 break;
1102 case kDCS:
1103 return GetDCSFileName(detector, id, source);
1104 break;
1105 case kHLT:
1106 return GetHLTFileName(detector, id, source);
1107 break;
1108 default:
1109 AliError(Form("No valid system index: %d",system));
1110 }
b948db8d 1111
b948db8d 1112 return 0;
1113}
1114
b948db8d 1115//______________________________________________________________________________________________
57f50b3c 1116TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
b948db8d 1117{
57f50b3c 1118// Get sources producing the condition file Id from file exchange servers
1119// calls specific getter according to system index (kDAQ, kDCS, kHLT)
1120
1121 switch(system){
1122 case kDAQ:
1123 return GetDAQFileSources(detector, id);
1124 break;
1125 case kDCS:
1126 return GetDCSFileSources(detector, id);
1127 break;
1128 case kHLT:
1129 return GetHLTFileSources(detector, id);
1130 break;
1131 default:
1132 AliError(Form("No valid system index: %d",system));
1133 }
1134
1135 return NULL;
1136}
1137
1138//______________________________________________________________________________________________
2bb7b766 1139Bool_t AliShuttle::Connect(Int_t system)
1140{
57f50b3c 1141// Connect to MySQL Server of the system's FES logbook
2bb7b766 1142// DAQ Logbook, Shuttle Logbook and DAQ FES Logbook are on the same host
57f50b3c 1143
1144 // check connection: if already connected return
1145 if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
1146
be48e3ea 1147 TString lbHost, lbUser, lbPass;
1148
1149 if (system < 3) // FES logbook servers
1150 {
1151 lbHost = Form("mysql://%s", fConfig->GetFESlbHost(system));
1152 lbUser = fConfig->GetFESlbUser(system);
1153 lbPass = fConfig->GetFESlbPass(system);
1154 } else { // Run & Shuttle logbook servers
1155 // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
1156 lbHost = Form("mysql://%s", fConfig->GetDAQlbHost());
1157 lbUser = fConfig->GetDAQlbUser();
1158 lbPass = fConfig->GetDAQlbPass();
1159 }
57f50b3c 1160
be48e3ea 1161 fServer[system] = TSQLServer::Connect(lbHost.Data(), lbUser.Data(), lbPass.Data());
57f50b3c 1162 if (!fServer[system] || !fServer[system]->IsConnected()) {
be48e3ea 1163 if(system < 3)
1164 {
eba76848 1165 AliError(Form("Can't establish connection to FES logbook for %s",
1166 AliShuttleInterface::GetSystemName(system)));
be48e3ea 1167 } else {
1168 AliError("Can't establish connection to Run logbook.");
1169 }
2bb7b766 1170 if(fServer[system]) delete fServer[system];
57f50b3c 1171 return kFALSE;
1172 }
1173
1174 // Get tables
1175 // TODO in the configuration should the table name be there too?
2bb7b766 1176 TSQLResult* aResult=0;
57f50b3c 1177 switch(system){
1178 case kDAQ:
2bb7b766 1179 aResult = fServer[kDAQ]->GetTables("REFSYSLOG");
57f50b3c 1180 break;
1181 case kDCS:
2bb7b766 1182 //aResult = fServer[kDCS]->GetTables("REFSYSLOG");
57f50b3c 1183 break;
1184 case kHLT:
2bb7b766 1185 //aResult = fServer[kHLT]->GetTables("REFSYSLOG");
57f50b3c 1186 break;
1187 default:
be48e3ea 1188 aResult = fServer[3]->GetTables("REFSYSLOG");
57f50b3c 1189 break;
1190 }
1191
2bb7b766 1192 delete aResult;
57f50b3c 1193 return kTRUE;
1194}
1195
1196//______________________________________________________________________________________________
2bb7b766 1197const char* AliShuttle::GetDAQFileName(const char* detector, const char* id, const char* source)
1198{
57f50b3c 1199// Retrieves a file from the DAQ FES.
1200// First queris the DAQ logbook_fs for the DAQ file name, using the run, detector, id and source info
1201// then calls RetrieveDAQFile(DAQfilename) for actual copy to local disk
2bb7b766 1202// run: current run being processed (given by Logbook entry fLogbookEntry)
eba76848 1203// detector: the Preprocessor name
57f50b3c 1204// id: provided as a parameter by the Preprocessor
1205// source: provided by the Preprocessor through GetFileSources function
1206
1207 // check connection, in case connect
eba76848 1208 if (!Connect(kDAQ))
1209 {
2bb7b766 1210 Log(detector, "GetDAQFileName - Couldn't connect to DAQ Logbook");
57f50b3c 1211 return 0;
1212 }
1213
1214 // Query preparation
1215 TString sqlQueryStart = "select filePath from logbook_fs where";
1216 TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\"",
eba76848 1217 GetCurrentRun(), detector, id, source);
57f50b3c 1218 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1219
84090f85 1220 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
57f50b3c 1221
1222 // Query execution
2bb7b766 1223 TSQLResult* aResult = 0;
1224 aResult = dynamic_cast<TSQLResult*> (fServer[kDAQ]->Query(sqlQuery));
57f50b3c 1225 if (!aResult) {
2bb7b766 1226 Log(detector, Form("GetDAQFileName - Can't execute SQL query for: id = %s, source = %s",
1227 id, source));
57f50b3c 1228 return 0;
1229 }
1230
eba76848 1231 if(aResult->GetRowCount() == 0)
1232 {
57f50b3c 1233 Log(detector,
2bb7b766 1234 Form("GetDAQFileName - No entry in FES table for: id = %s, source = %s",
1235 id, source));
57f50b3c 1236 delete aResult;
1237 return 0;
1238 }
1239
eba76848 1240 if (aResult->GetRowCount() > 1) {
57f50b3c 1241 Log(detector,
2bb7b766 1242 Form("GetDAQFileName - More than one entry in FES table for: id = %s, source = %s",
1243 id, source));
57f50b3c 1244 delete aResult;
1245 return 0;
1246 }
1247
2bb7b766 1248 TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
57f50b3c 1249
eba76848 1250 if (!aRow){
2bb7b766 1251 Log(detector, Form("GetDAQFileName - Empty set result from query: id = %s, source = %s",
1252 id, source));
57f50b3c 1253 delete aResult;
1254 return 0;
1255 }
1256
1257 TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1258
1259 delete aResult;
2bb7b766 1260 delete aRow;
57f50b3c 1261
84090f85 1262 AliDebug(2, Form("filePath = %s",filePath.Data()));
57f50b3c 1263
1264 // retrieved file is renamed to make it unique
1265 TString localFileName = Form("%s_%d_%s_%s.shuttle",
2bb7b766 1266 detector, GetCurrentRun(), id, source);
57f50b3c 1267
1268 // file retrieval from DAQ FES
1269 Bool_t result = RetrieveDAQFile(filePath.Data(), localFileName.Data());
1270 if(!result) {
2bb7b766 1271 Log(detector, Form("GetDAQFileName - Copy of file %s from DAQ FES failed", filePath.Data()));
57f50b3c 1272 return 0;
1273 } else {
2bb7b766 1274 AliInfo(Form("File %s copied from DAQ FES into %s/%s",
57f50b3c 1275 filePath.Data(), fgkShuttleTempDir, localFileName.Data()));
1276 }
1277
1278
1279 fFESCalled[kDAQ]=kTRUE;
1280 TObjString *fileParams = new TObjString(Form("%s_!?!_%s", id, source));
1281 fFESlist[kDAQ].Add(fileParams);
1282
1283 return localFileName.Data();
1284
1285}
1286
1287//______________________________________________________________________________________________
2bb7b766 1288Bool_t AliShuttle::RetrieveDAQFile(const char* daqFileName, const char* localFileName)
1289{
57f50b3c 1290
1291 // check temp directory: trying to cd to temp; if it does not exist, create it
84090f85 1292 AliDebug(2, Form("Copy file %s from DAQ FES into folder %s and rename it as %s",
57f50b3c 1293 daqFileName,fgkShuttleTempDir, localFileName));
1294
1295 void* dir = gSystem->OpenDirectory(fgkShuttleTempDir);
1296 if (dir == NULL) {
1297 if (gSystem->mkdir(fgkShuttleTempDir, kTRUE)) {
2bb7b766 1298 AliError(Form("Can't open directory <%s>", fgkShuttleTempDir));
57f50b3c 1299 return kFALSE;
1300 }
1301
1302 } else {
1303 gSystem->FreeDirectory(dir);
1304 }
1305
1306 TString baseDAQFESFolder = "DAQ";
1307 TString command = Form("scp %s@%s:%s/%s %s/%s",
1308 fConfig->GetFESUser(kDAQ),
1309 fConfig->GetFESHost(kDAQ),
1310 baseDAQFESFolder.Data(),
1311 daqFileName,
1312 fgkShuttleTempDir,
1313 localFileName);
1314
84090f85 1315 AliDebug(2, Form("%s",command.Data()));
57f50b3c 1316
1317 UInt_t nRetries = 0;
1318 UInt_t maxRetries = 3;
1319
1320 // copy!! if successful TSystem::Exec returns 0
1321 while(nRetries++ < maxRetries) {
84090f85 1322 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
57f50b3c 1323 if(gSystem->Exec(command.Data()) == 0) return kTRUE;
1324 }
1325
1326 return kFALSE;
1327
1328}
1329
1330//______________________________________________________________________________________________
2bb7b766 1331TList* AliShuttle::GetDAQFileSources(const char* detector, const char* id)
1332{
57f50b3c 1333// Retrieves a file from the DCS FES.
1334
1335 // check connection, in case connect
1336 if(!Connect(kDAQ)){
2bb7b766 1337 Log(detector, "GetDAQFileSources - Couldn't connect to DAQ Logbook");
57f50b3c 1338 return 0;
1339 }
1340
1341 // Query preparation
1342 TString sqlQueryStart = "select DAQsource from logbook_fs where";
1343 TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
eba76848 1344 GetCurrentRun(), detector, id);
57f50b3c 1345 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1346
84090f85 1347 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
57f50b3c 1348
1349 // Query execution
1350 TSQLResult* aResult;
1351 aResult = fServer[kDAQ]->Query(sqlQuery);
1352 if (!aResult) {
2bb7b766 1353 Log(detector, Form("GetDAQFileSources - Can't execute SQL query for id: %s", id));
57f50b3c 1354 return 0;
1355 }
1356
1357 if (aResult->GetRowCount() == 0) {
1358 Log(detector,
2bb7b766 1359 Form("GetDAQFileSources - No entry in FES table for id: %s", id));
57f50b3c 1360 delete aResult;
1361 return 0;
1362 }
1363
1364 TSQLRow* aRow;
1365 TList *list = new TList();
1366 list->SetOwner(1);
1367
1368 while((aRow = aResult->Next())){
1369
1370 TString daqSource(aRow->GetField(0), aRow->GetFieldLength(0));
84090f85 1371 AliDebug(2, Form("daqSource = %s", daqSource.Data()));
57f50b3c 1372 list->Add(new TObjString(daqSource));
2bb7b766 1373 delete aRow;
57f50b3c 1374 }
1375 delete aResult;
1376
1377 return list;
1378
1379}
1380
1381//______________________________________________________________________________________________
2bb7b766 1382const char* AliShuttle::GetDCSFileName(const char* /*detector*/, const char* /*id*/, const char* /*source*/){
1383// Retrieves a file from the DCS FES.
1384
1385return "You're in DCS";
1386
1387}
1388
1389//______________________________________________________________________________________________
1390TList* AliShuttle::GetDCSFileSources(const char* /*detector*/, const char* /*id*/){
1391// Retrieves a file from the DCS FES.
1392
1393return NULL;
1394
1395}
1396
1397//______________________________________________________________________________________________
1398const char* AliShuttle::GetHLTFileName(const char* /*detector*/, const char* /*id*/, const char* /*source*/){
1399// Retrieves a file from the HLT FES.
1400
1401return "You're in HLT";
1402
1403}
1404
1405//______________________________________________________________________________________________
1406TList* AliShuttle::GetHLTFileSources(const char* /*detector*/, const char* /*id*/){
1407// Retrieves a file from the HLT FES.
1408
1409return NULL;
1410
1411}
1412
1413//______________________________________________________________________________________________
1414Bool_t AliShuttle::UpdateDAQTable()
1415{
57f50b3c 1416// Update DAQ table filling time_processed field in all rows corresponding to current run and detector
1417
1418 // check connection, in case connect
1419 if(!Connect(kDAQ)){
2bb7b766 1420 Log(fCurrentDetector, "UpdateDAQTable - Couldn't connect to DAQ Logbook");
57f50b3c 1421 return kFALSE;
1422 }
1423
1424 TTimeStamp now; // now
1425
1426 // Loop on FES list entries
1427 TIter iter(&fFESlist[kDAQ]);
1428 TObjString *aFESentry=0;
1429 while((aFESentry = dynamic_cast<TObjString*> (iter.Next()))){
1430 TString aFESentrystr = aFESentry->String();
1431 TObjArray *aFESarray = aFESentrystr.Tokenize("_!?!_");
1432 if(!aFESarray || aFESarray->GetEntries() != 2 ) {
2bb7b766 1433 Log(fCurrentDetector, Form("UpdateDAQTable - error updating FES entry. Check string: <%s>",
57f50b3c 1434 aFESentrystr.Data()));
1435 if(aFESarray) delete aFESarray;
1436 return kFALSE;
1437 }
1438 const char* fileId = ((TObjString*) aFESarray->At(0))->GetName();
1439 const char* daqSource = ((TObjString*) aFESarray->At(1))->GetName();
1440 TString whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
eba76848 1441 GetCurrentRun(), fCurrentDetector.Data(), fileId, daqSource);
57f50b3c 1442
1443 delete aFESarray;
1444
1445 TString sqlQuery = Form("update logbook_fs set time_processed=%d %s", now.GetSec(), whereClause.Data());
1446
84090f85 1447 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
57f50b3c 1448
1449 // Query execution
1450 TSQLResult* aResult;
be48e3ea 1451 aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
57f50b3c 1452 if (!aResult) {
2bb7b766 1453 Log(fCurrentDetector, Form("UpdateDAQTable - Can't execute SQL query <%s>", sqlQuery.Data()));
57f50b3c 1454 return kFALSE;
1455 }
1456 delete aResult;
2bb7b766 1457 }
57f50b3c 1458
2bb7b766 1459 return kTRUE;
1460}
57f50b3c 1461
57f50b3c 1462
2bb7b766 1463//______________________________________________________________________________________________
1464Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
1465{
1466// Update Shuttle logbook filling detector or shuttle_done column
1467// ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
57f50b3c 1468
2bb7b766 1469 // check connection, in case connect
be48e3ea 1470 if(!Connect(3)){
2bb7b766 1471 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
1472 return kFALSE;
57f50b3c 1473 }
1474
2bb7b766 1475 TString detName(detector);
1476 TString setClause;
1477 if(detName == "shuttle_done") {
1478 setClause = "set shuttle_done=1";
1479 } else {
2bb7b766 1480 TString statusStr(status);
1481 if(statusStr.Contains("done", TString::kIgnoreCase) ||
1482 statusStr.Contains("failed", TString::kIgnoreCase)){
eba76848 1483 setClause = Form("set %s=\"%s\"", detector, status);
2bb7b766 1484 } else {
1485 Log("SHUTTLE",
1486 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
1487 status, detector));
1488 return kFALSE;
1489 }
1490 }
57f50b3c 1491
2bb7b766 1492 TString whereClause = Form("where run=%d", GetCurrentRun());
1493
1494 TString sqlQuery = Form("update logbook_shuttle %s %s",
1495 setClause.Data(), whereClause.Data());
57f50b3c 1496
2bb7b766 1497 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1498
1499 // Query execution
1500 TSQLResult* aResult;
be48e3ea 1501 aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2bb7b766 1502 if (!aResult) {
1503 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
1504 return kFALSE;
57f50b3c 1505 }
2bb7b766 1506 delete aResult;
57f50b3c 1507
1508 return kTRUE;
1509}
1510
1511//______________________________________________________________________________________________
2bb7b766 1512Int_t AliShuttle::GetCurrentRun() const
1513{
1514// Get current run from logbook entry
57f50b3c 1515
2bb7b766 1516 return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
57f50b3c 1517}
1518
1519//______________________________________________________________________________________________
2bb7b766 1520UInt_t AliShuttle::GetCurrentStartTime() const
1521{
1522// get current start time
57f50b3c 1523
2bb7b766 1524 return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
57f50b3c 1525}
1526
1527//______________________________________________________________________________________________
2bb7b766 1528UInt_t AliShuttle::GetCurrentEndTime() const
1529{
1530// get current end time from logbook entry
57f50b3c 1531
2bb7b766 1532 return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
57f50b3c 1533}
1534
b948db8d 1535//______________________________________________________________________________________________
1536void AliShuttle::Log(const char* detector, const char* message)
1537{
58bc3020 1538// Fill log string with a message
b948db8d 1539
84090f85 1540 void* dir = gSystem->OpenDirectory(fgkShuttleLogDir);
1541 if (dir == NULL) {
1542 if (gSystem->mkdir(fgkShuttleLogDir, kTRUE)) {
2bb7b766 1543 AliError(Form("Can't open directory <%s>", fgkShuttleTempDir));
84090f85 1544 return;
1545 }
b948db8d 1546
84090f85 1547 } else {
1548 gSystem->FreeDirectory(dir);
1549 }
b948db8d 1550
cb343cfd 1551 TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
2bb7b766 1552 if(GetCurrentRun()>=0 ) toLog += Form("run %d - ", GetCurrentRun());
1553 toLog += Form("%s", message);
1554
84090f85 1555 AliInfo(toLog.Data());
b948db8d 1556
84090f85 1557 TString fileName;
1558 fileName.Form("%s/%s.log", fgkShuttleLogDir, detector);
1559 gSystem->ExpandPathName(fileName);
1560
1561 ofstream logFile;
1562 logFile.open(fileName, ofstream::out | ofstream::app);
1563
1564 if (!logFile.is_open()) {
1565 AliError(Form("Could not open file %s", fileName.Data()));
1566 return;
1567 }
7bfb2090 1568
84090f85 1569 logFile << toLog.Data() << "\n";
b948db8d 1570
84090f85 1571 logFile.close();
b948db8d 1572}
2bb7b766 1573
2bb7b766 1574//______________________________________________________________________________________________
1575Bool_t AliShuttle::Collect(Int_t run)
1576{
eba76848 1577//
1578// Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
1579// If a dedicated run is given this run is processed
1580//
1581// In operational mode, this is the Shuttle function triggered by the EOR signal.
1582//
2bb7b766 1583
eba76848 1584 if (run == -1)
1585 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
1586 else
1587 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
cb343cfd 1588
1589 SetLastAction("Starting");
2bb7b766 1590
1591 TString whereClause("where shuttle_done=0");
eba76848 1592 if (run != -1)
1593 whereClause += Form(" and run=%d", run);
2bb7b766 1594
1595 TObjArray shuttleLogbookEntries;
be48e3ea 1596 if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
1597 {
cb343cfd 1598 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2bb7b766 1599 return kFALSE;
1600 }
1601
be48e3ea 1602 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1603 fFirstUnprocessed[iDet] = kTRUE;
1604
1605 if (run != 1)
1606 {
1607 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
1608 // flag them into fFirstUnprocessed array
1609 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
1610 TObjArray tmpLogbookEntries;
1611 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
1612 {
1613 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1614 return kFALSE;
1615 }
1616
1617 TIter iter(&tmpLogbookEntries);
1618 AliShuttleLogbookEntry* anEntry = 0;
1619 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
1620 {
1621 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1622 {
1623 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1624 {
1625 AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1626 anEntry->GetRun(), GetDetName(iDet)));
1627 fFirstUnprocessed[iDet] = kFALSE;
1628 }
1629 }
1630
1631 }
1632
1633 }
1634
1635 if (!RetrieveConditionsData(shuttleLogbookEntries))
1636 {
cb343cfd 1637 Log("SHUTTLE", "Collect - Process of at least one run failed");
2bb7b766 1638 return kFALSE;
1639 }
1640
eba76848 1641 return kTRUE;
2bb7b766 1642}
1643
2bb7b766 1644//______________________________________________________________________________________________
1645Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
1646{
1647// Retrieve conditions data for all runs that aren't processed yet
1648
1649 Bool_t hasError = kFALSE;
1650
1651 TIter iter(&dateEntries);
1652 AliShuttleLogbookEntry* anEntry;
1653
1654 while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
1655 if (!Process(anEntry)){
1656 hasError = kTRUE;
1657 }
1658 }
1659
1660 return hasError == kFALSE;
1661}
cb343cfd 1662
1663//______________________________________________________________________________________________
1664ULong_t AliShuttle::GetTimeOfLastAction() const
1665{
1666 ULong_t tmp;
1667
1668 fMonitoringMutex->Lock();
be48e3ea 1669
cb343cfd 1670 tmp = fLastActionTime;
1671
1672 fMonitoringMutex->UnLock();
1673
1674 return tmp;
1675}
1676
1677//______________________________________________________________________________________________
1678const TString AliShuttle::GetLastAction() const
1679{
1680 // returns a string description of the last action
1681
1682 TString tmp;
1683
1684 fMonitoringMutex->Lock();
1685
1686 tmp = fLastAction;
1687
1688 fMonitoringMutex->UnLock();
1689
1690 return tmp;
1691}
1692
1693//______________________________________________________________________________________________
1694void AliShuttle::SetLastAction(const char* action)
1695{
1696 // updates the monitoring variables
1697
1698 fMonitoringMutex->Lock();
1699
1700 fLastAction = action;
1701 fLastActionTime = time(0);
1702
1703 fMonitoringMutex->UnLock();
1704}
eba76848 1705
1706//______________________________________________________________________________________________
1707const char* AliShuttle::GetRunParameter(const char* param)
1708{
1709// returns run parameter read from DAQ logbook
1710
1711 if(!fLogbookEntry) {
1712 AliError("No logbook entry!");
1713 return 0;
1714 }
1715
1716 return fLogbookEntry->GetRunParameter(param);
1717}