]> git.uio.no Git - u/mrichter/AliRoot.git/blame - SHUTTLE/AliShuttle.cxx
Run type field added in SHUTTLE framework. Run type is read from "run type" logbook...
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
CommitLineData
73abe331 1/**************************************************************************
2 * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3 * *
4 * Author: The ALICE Off-line Project. *
5 * Contributors are mentioned in the code where appropriate. *
6 * *
7 * Permission to use, copy, modify and distribute this software and its *
8 * documentation strictly for non-commercial purposes is hereby granted *
9 * without fee, provided that the above copyright notice appears in all *
10 * copies and that both the copyright notice and this permission notice *
11 * appear in the supporting documentation. The authors make no claims *
12 * about the suitability of this software for any purpose. It is *
13 * provided "as is" without express or implied warranty. *
14 **************************************************************************/
15
16/*
17$Log$
d386d623 18Revision 1.30 2007/02/13 11:23:21 acolla
19Moved getters and setters of Shuttle's main OCDB/Reference, local
20OCDB/Reference, temp and log folders to AliShuttleInterface
21
9d733021 22Revision 1.27 2007/01/30 17:52:42 jgrosseo
23adding monalisa monitoring
24
e7f62f16 25Revision 1.26 2007/01/23 19:20:03 acolla
26Removed old ldif files, added TOF, MCH ldif files. Added some options in
27AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
28SetShuttleLogDir
29
36c99a6a 30Revision 1.25 2007/01/15 19:13:52 acolla
31Moved some AliInfo to AliDebug in SendMail function
32
fc5a4708 33Revision 1.21 2006/12/07 08:51:26 jgrosseo
34update (alberto):
35table, db names in ldap configuration
36added GRP preprocessor
37DCS data can also be retrieved by data point
38
2c15234c 39Revision 1.20 2006/11/16 16:16:48 jgrosseo
40introducing strict run ordering flag
41removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
42
be48e3ea 43Revision 1.19 2006/11/06 14:23:04 jgrosseo
44major update (Alberto)
45o) reading of run parameters from the logbook
46o) online offline naming conversion
47o) standalone DCSclient package
48
eba76848 49Revision 1.18 2006/10/20 15:22:59 jgrosseo
50o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
51o) Merging Collect, CollectAll, CollectNew function
52o) Removing implementation of empty copy constructors (declaration still there!)
53
cb343cfd 54Revision 1.17 2006/10/05 16:20:55 jgrosseo
55adapting to new CDB classes
56
6ec0e06c 57Revision 1.16 2006/10/05 15:46:26 jgrosseo
58applying to the new interface
59
481441a2 60Revision 1.15 2006/10/02 16:38:39 jgrosseo
61update (alberto):
62fixed memory leaks
63storing of objects that failed to be stored to the grid before
64interfacing of shuttle status table in daq system
65
2bb7b766 66Revision 1.14 2006/08/29 09:16:05 jgrosseo
67small update
68
85a80aa9 69Revision 1.13 2006/08/15 10:50:00 jgrosseo
70effc++ corrections (alberto)
71
4f0ab988 72Revision 1.12 2006/08/08 14:19:29 jgrosseo
73Update to shuttle classes (Alberto)
74
75- Possibility to set the full object's path in the Preprocessor's and
76Shuttle's Store functions
77- Possibility to extend the object's run validity in the same classes
78("startValidity" and "validityInfinite" parameters)
79- Implementation of the StoreReferenceData function to store reference
80data in a dedicated CDB storage.
81
84090f85 82Revision 1.11 2006/07/21 07:37:20 jgrosseo
83last run is stored after each run
84
7bfb2090 85Revision 1.10 2006/07/20 09:54:40 jgrosseo
86introducing status management: The processing per subdetector is divided into several steps,
87after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
88can keep track of the number of failures and skips further processing after a certain threshold is
89exceeded. These thresholds can be configured in LDAP.
90
5164a766 91Revision 1.9 2006/07/19 10:09:55 jgrosseo
92new configuration, accesst to DAQ FES (Alberto)
93
57f50b3c 94Revision 1.8 2006/07/11 12:44:36 jgrosseo
95adding parameters for extended validity range of data produced by preprocessor
96
17111222 97Revision 1.7 2006/07/10 14:37:09 jgrosseo
98small fix + todo comment
99
e090413b 100Revision 1.6 2006/07/10 13:01:41 jgrosseo
101enhanced storing of last sucessfully processed run (alberto)
102
a7160fe9 103Revision 1.5 2006/07/04 14:59:57 jgrosseo
104revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
105
45a493ce 106Revision 1.4 2006/06/12 09:11:16 jgrosseo
107coding conventions (Alberto)
108
58bc3020 109Revision 1.3 2006/06/06 14:26:40 jgrosseo
110o) removed files that were moved to STEER
111o) shuttle updated to follow the new interface (Alberto)
112
b948db8d 113Revision 1.2 2006/03/07 07:52:34 hristov
114New version (B.Yordanov)
115
d477ad88 116Revision 1.6 2005/11/19 17:19:14 byordano
117RetrieveDATEEntries and RetrieveConditionsData added
118
119Revision 1.5 2005/11/19 11:09:27 byordano
120AliShuttle declaration added
121
122Revision 1.4 2005/11/17 17:47:34 byordano
123TList changed to TObjArray
124
125Revision 1.3 2005/11/17 14:43:23 byordano
126import to local CVS
127
128Revision 1.1.1.1 2005/10/28 07:33:58 hristov
129Initial import as subdirectory in AliRoot
130
73abe331 131Revision 1.2 2005/09/13 08:41:15 byordano
132default startTime endTime added
133
134Revision 1.4 2005/08/30 09:13:02 byordano
135some docs added
136
137Revision 1.3 2005/08/29 21:15:47 byordano
138some docs added
139
140*/
141
142//
143// This class is the main manager for AliShuttle.
144// It organizes the data retrieval from DCS and call the
b948db8d 145// interface methods of AliPreprocessor.
73abe331 146// For every detector in AliShuttleConfgi (see AliShuttleConfig),
147// data for its set of aliases is retrieved. If there is registered
b948db8d 148// AliPreprocessor for this detector then it will be used
149// accroding to the schema (see AliPreprocessor).
150// If there isn't registered AliPreprocessor than the retrieved
73abe331 151// data is stored automatically to the undelying AliCDBStorage.
152// For detSpec is used the alias name.
153//
154
155#include "AliShuttle.h"
156
157#include "AliCDBManager.h"
158#include "AliCDBStorage.h"
159#include "AliCDBId.h"
84090f85 160#include "AliCDBRunRange.h"
161#include "AliCDBPath.h"
5164a766 162#include "AliCDBEntry.h"
73abe331 163#include "AliShuttleConfig.h"
eba76848 164#include "DCSClient/AliDCSClient.h"
73abe331 165#include "AliLog.h"
b948db8d 166#include "AliPreprocessor.h"
5164a766 167#include "AliShuttleStatus.h"
2bb7b766 168#include "AliShuttleLogbookEntry.h"
73abe331 169
57f50b3c 170#include <TSystem.h>
58bc3020 171#include <TObject.h>
b948db8d 172#include <TString.h>
57f50b3c 173#include <TTimeStamp.h>
73abe331 174#include <TObjString.h>
57f50b3c 175#include <TSQLServer.h>
176#include <TSQLResult.h>
177#include <TSQLRow.h>
cb343cfd 178#include <TMutex.h>
73abe331 179
e7f62f16 180#include <TMonaLisaWriter.h>
181
5164a766 182#include <fstream>
183
cb343cfd 184#include <sys/types.h>
185#include <sys/wait.h>
186
73abe331 187ClassImp(AliShuttle)
188
10a5a932 189Bool_t AliShuttle::fgkProcessDCS(kTRUE);
57f50b3c 190
b948db8d 191//______________________________________________________________________________________________
192AliShuttle::AliShuttle(const AliShuttleConfig* config,
193 UInt_t timeout, Int_t retries):
4f0ab988 194fConfig(config),
195fTimeout(timeout), fRetries(retries),
196fPreprocessorMap(),
2bb7b766 197fLogbookEntry(0),
eba76848 198fCurrentDetector(),
85a80aa9 199fStatusEntry(0),
cb343cfd 200fGridError(kFALSE),
201fMonitoringMutex(0),
eba76848 202fLastActionTime(0),
e7f62f16 203fLastAction(),
204fMonaLisa(0)
73abe331 205{
206 //
207 // config: AliShuttleConfig used
73abe331 208 // timeout: timeout used for AliDCSClient connection
209 // retries: the number of retries in case of connection error.
210 //
211
57f50b3c 212 if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
be48e3ea 213 for(int iSys=0;iSys<4;iSys++) {
57f50b3c 214 fServer[iSys]=0;
be48e3ea 215 if (iSys < 3)
2c15234c 216 fFXSlist[iSys].SetOwner(kTRUE);
57f50b3c 217 }
2bb7b766 218 fPreprocessorMap.SetOwner(kTRUE);
be48e3ea 219
220 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
221 fFirstUnprocessed[iDet] = kFALSE;
222
cb343cfd 223 fMonitoringMutex = new TMutex();
58bc3020 224}
225
b948db8d 226//______________________________________________________________________________________________
57f50b3c 227AliShuttle::~AliShuttle()
58bc3020 228{
229// destructor
230
b948db8d 231 fPreprocessorMap.DeleteAll();
be48e3ea 232 for(int iSys=0;iSys<4;iSys++)
57f50b3c 233 if(fServer[iSys]) {
234 fServer[iSys]->Close();
235 delete fServer[iSys];
eba76848 236 fServer[iSys] = 0;
57f50b3c 237 }
2bb7b766 238
239 if (fStatusEntry){
240 delete fStatusEntry;
241 fStatusEntry = 0;
242 }
cb343cfd 243
244 if (fMonitoringMutex)
245 {
246 delete fMonitoringMutex;
247 fMonitoringMutex = 0;
248 }
73abe331 249}
250
b948db8d 251//______________________________________________________________________________________________
57f50b3c 252void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
58bc3020 253{
73abe331 254 //
b948db8d 255 // Registers new AliPreprocessor.
73abe331 256 // It uses GetName() for indentificator of the pre processor.
257 // The pre processor is registered it there isn't any other
258 // with the same identificator (GetName()).
259 //
260
eba76848 261 const char* detName = preprocessor->GetName();
262 if(GetDetPos(detName) < 0)
263 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
264
265 if (fPreprocessorMap.GetValue(detName)) {
266 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
73abe331 267 return;
268 }
269
eba76848 270 fPreprocessorMap.Add(new TObjString(detName), preprocessor);
73abe331 271}
b948db8d 272//______________________________________________________________________________________________
84090f85 273UInt_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
274 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
73abe331 275{
84090f85 276 // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
277 // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
278 // using this function. Use StoreReferenceData instead!
85a80aa9 279 // It calls WriteToCDB function which perform actual storage
b948db8d 280
85a80aa9 281 return WriteToCDB(fgkMainCDB, fgkLocalCDB, path, object,
282 metaData, validityStart, validityInfinite);
84090f85 283
284}
285
286//______________________________________________________________________________________________
481441a2 287UInt_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
84090f85 288{
289 // Stores a CDB object in the storage for reference data. This objects will not be available during
290 // offline reconstrunction. Use this function for reference data only!
85a80aa9 291 // It calls WriteToCDB function which perform actual storage
292
481441a2 293 return WriteToCDB(fgkMainRefStorage, fgkLocalRefStorage, path, object, metaData);
84090f85 294
85a80aa9 295}
296
297//______________________________________________________________________________________________
298UInt_t AliShuttle::WriteToCDB(const char* mainUri, const char* localUri,
299 const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
300 Int_t validityStart, Bool_t validityInfinite)
301{
302 // write object into the CDB. Parameters are passed by Store and StoreReferenceData functions.
303 // The parameters are:
304 // 1) Uri of the main storage (Grid)
305 // 2) Uri of the backup storage (Local)
306 // 3) the object's path.
307 // 4) the object to be stored
308 // 5) the metaData to be associated with the object
309 // 6) the validity start run number w.r.t. the current run,
84090f85 310 // if the data is valid only for this run leave the default 0
85a80aa9 311 // 7) specifies if the calibration data is valid for infinity (this means until updated),
84090f85 312 // typical for calibration runs, the default is kFALSE
313 //
84090f85 314 // returns 0 if fail
85a80aa9 315 // 1 if stored in main (Grid) storage
316 // 2 if stored in backup (Local) storage
84090f85 317
85a80aa9 318 const char* cdbType = (mainUri == fgkMainCDB) ? "CDB" : "Reference";
2bb7b766 319
85a80aa9 320 Int_t firstRun = GetCurrentRun() - validityStart;
84090f85 321 if(firstRun < 0) {
2bb7b766 322 AliError("First valid run happens to be less than 0! Setting it to 0.");
84090f85 323 firstRun=0;
324 }
325
326 Int_t lastRun = -1;
327 if(validityInfinite) {
328 lastRun = AliCDBRunRange::Infinity();
329 } else {
330 lastRun = GetCurrentRun();
331 }
332
2bb7b766 333 AliCDBId id(path, firstRun, lastRun, -1, -1);
334
335 if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
336 TObjString runUsed = Form("%d", GetCurrentRun());
9e080f92 337 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
2bb7b766 338 }
84090f85 339
340 UInt_t result = 0;
341
85a80aa9 342 if (!(AliCDBManager::Instance()->GetStorage(mainUri))) {
2bb7b766 343 AliError(Form("WriteToCDB - Cannot activate main %s storage", cdbType));
84090f85 344 } else {
85a80aa9 345 result = (UInt_t) AliCDBManager::Instance()->GetStorage(mainUri)
84090f85 346 ->Put(object, id, metaData);
347 }
348
349 if(!result) {
350
351 Log(fCurrentDetector,
2bb7b766 352 Form("WriteToCDB - Problem with main %s storage. Putting <%s> into backup storage",
353 cdbType, path.GetPath().Data()));
354
355 // Set Grid version to current run number, to ease retrieval later
356 id.SetVersion(GetCurrentRun());
84090f85 357
85a80aa9 358 result = AliCDBManager::Instance()->GetStorage(localUri)
84090f85 359 ->Put(object, id, metaData);
360
361 if(result) {
362 result = 2;
85a80aa9 363 fGridError = kTRUE;
84090f85 364 }else{
2bb7b766 365 Log(fCurrentDetector, "WriteToCDB - Can't store data!");
b948db8d 366 }
367 }
2bb7b766 368
b948db8d 369 return result;
370
73abe331 371}
372
b948db8d 373//______________________________________________________________________________________________
5164a766 374AliShuttleStatus* AliShuttle::ReadShuttleStatus()
375{
2bb7b766 376// Reads the AliShuttleStatus from the CDB
5164a766 377
2bb7b766 378 if (fStatusEntry){
379 delete fStatusEntry;
380 fStatusEntry = 0;
381 }
5164a766 382
10a5a932 383 fStatusEntry = AliCDBManager::Instance()->GetStorage(GetLocalCDB())
2bb7b766 384 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
5164a766 385
2bb7b766 386 if (!fStatusEntry) return 0;
387 fStatusEntry->SetOwner(1);
5164a766 388
2bb7b766 389 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
390 if (!status) {
391 AliError("Invalid object stored to CDB!");
392 return 0;
393 }
5164a766 394
2bb7b766 395 return status;
5164a766 396}
397
398//______________________________________________________________________________________________
7bfb2090 399Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
5164a766 400{
2bb7b766 401// writes the status for one subdetector
402
403 if (fStatusEntry){
404 delete fStatusEntry;
405 fStatusEntry = 0;
406 }
5164a766 407
2bb7b766 408 Int_t run = GetCurrentRun();
5164a766 409
2bb7b766 410 AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
5164a766 411
2bb7b766 412 fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
413 fStatusEntry->SetOwner(1);
5164a766 414
2bb7b766 415 UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
7bfb2090 416
2bb7b766 417 if (!result) {
418 AliError(Form("WriteShuttleStatus for %s, run %d failed", fCurrentDetector.Data(), run));
419 return kFALSE;
420 }
e7f62f16 421
422 SendMLInfo();
7bfb2090 423
2bb7b766 424 return kTRUE;
5164a766 425}
426
427//______________________________________________________________________________________________
428void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
429{
430 // changes the AliShuttleStatus for the given detector and run to the given status
431
2bb7b766 432 if (!fStatusEntry){
433 AliError("UNEXPECTED: fStatusEntry empty");
434 return;
435 }
5164a766 436
2bb7b766 437 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
5164a766 438
2bb7b766 439 if (!status){
440 AliError("UNEXPECTED: status could not be read from current CDB entry");
441 return;
442 }
5164a766 443
2c15234c 444 TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
eba76848 445 fCurrentDetector.Data(),
36c99a6a 446 status->GetStatusName(),
eba76848 447 status->GetStatusName(newStatus));
cb343cfd 448 Log("SHUTTLE", actionStr);
449 SetLastAction(actionStr);
5164a766 450
2bb7b766 451 status->SetStatus(newStatus);
452 if (increaseCount) status->IncreaseCount();
5164a766 453
2bb7b766 454 AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
e7f62f16 455
456 SendMLInfo();
5164a766 457}
e7f62f16 458
459//______________________________________________________________________________________________
460void AliShuttle::SendMLInfo()
461{
462 //
463 // sends ML information about the current status of the current detector being processed
464 //
465
466 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
467
468 if (!status){
469 AliError("UNEXPECTED: status could not be read from current CDB entry");
470 return;
471 }
472
473 TMonaLisaText mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
474 TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
475
476 TList mlList;
477 mlList.Add(&mlStatus);
478 mlList.Add(&mlRetryCount);
479
480 fMonaLisa->SendParameters(&mlList);
481}
482
5164a766 483//______________________________________________________________________________________________
484Bool_t AliShuttle::ContinueProcessing()
485{
2bb7b766 486// this function reads the AliShuttleStatus information from CDB and
487// checks if the processing should be continued
488// if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
489
57c1a579 490 if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
491
492 AliPreprocessor* aPreprocessor =
493 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
494 if (!aPreprocessor)
495 {
496 AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data()));
497 return kFALSE;
498 }
499
2bb7b766 500 AliShuttleLogbookEntry::Status entryStatus =
eba76848 501 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
2bb7b766 502
503 if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
9e080f92 504 AliInfo(Form("ContinueProcessing - %s is %s",
2bb7b766 505 fCurrentDetector.Data(),
506 fLogbookEntry->GetDetectorStatusName(entryStatus)));
507 return kFALSE;
508 }
509
510 // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
be48e3ea 511
512 // check if current run is first unprocessed run for current detector
513 if (fConfig->StrictRunOrder(fCurrentDetector) &&
514 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
515 {
516 Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
517 return kFALSE;
518 }
519
2bb7b766 520 AliShuttleStatus* status = ReadShuttleStatus();
521 if (!status) {
522 // first time
523 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
524 fCurrentDetector.Data()));
525 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
526 return WriteShuttleStatus(status);
527 }
528
529 // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
530 // If it happens it may mean Logbook updating failed... let's do it now!
531 if (status->GetStatus() == AliShuttleStatus::kDone ||
532 status->GetStatus() == AliShuttleStatus::kFailed){
533 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
534 fCurrentDetector.Data(),
535 status->GetStatusName(status->GetStatus())));
536 UpdateShuttleLogbook(fCurrentDetector.Data(),
537 status->GetStatusName(status->GetStatus()));
538 return kFALSE;
539 }
540
541 if (status->GetStatus() == AliShuttleStatus::kStoreFailed) {
542 Log("SHUTTLE",
543 Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
544 fCurrentDetector.Data()));
545 if(TryToStoreAgain()){
546 Log(fCurrentDetector.Data(), "ContinueProcessing - All objects successfully stored into OCDB");
547 UpdateShuttleStatus(AliShuttleStatus::kDone);
548 UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
549 } else {
550 Log("SHUTTLE",
551 Form("ContinueProcessing - %s: Grid storage failed again",
552 fCurrentDetector.Data()));
e7f62f16 553 // trigger ML information manually because we do not had a status change
554 SendMLInfo();
2bb7b766 555 }
556 return kFALSE;
557 }
558
559 // if we get here, there is a restart
57c1a579 560 Bool_t cont = kFALSE;
2bb7b766 561
562 // abort conditions
cb343cfd 563 if (status->GetCount() >= fConfig->GetMaxRetries()) {
57c1a579 564 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
565 "Updating Shuttle Logbook", fCurrentDetector.Data(),
2bb7b766 566 status->GetCount(), status->GetStatusName()));
567 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
e7f62f16 568 UpdateShuttleStatus(AliShuttleStatus::kFailed);
57c1a579 569 } else {
570 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
571 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
572 status->GetStatusName(), status->GetCount()));
573 UpdateShuttleStatus(AliShuttleStatus::kStarted, kTRUE);
574 cont = kTRUE;
2bb7b766 575 }
576
57c1a579 577 // Send mail to detector expert!
578 AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data()));
579 if (!SendMail())
580 Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
581 fCurrentDetector.Data()));
2bb7b766 582
57c1a579 583 return cont;
5164a766 584}
585
586//______________________________________________________________________________________________
2bb7b766 587Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
58bc3020 588{
73abe331 589 //
b948db8d 590 // Makes data retrieval for all detectors in the configuration.
2bb7b766 591 // entry: Shuttle logbook entry, contains run paramenters and status of detectors
592 // (Unprocessed, Inactive, Failed or Done).
d477ad88 593 // Returns kFALSE in case of error occured and kTRUE otherwise
73abe331 594 //
595
2bb7b766 596 if(!entry) return kFALSE;
597
598 fLogbookEntry = entry;
599
e7f62f16 600 if (fLogbookEntry->IsDone())
601 {
2bb7b766 602 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
603 UpdateShuttleLogbook("shuttle_done");
604 fLogbookEntry = 0;
605 return kTRUE;
606 }
607
e7f62f16 608 // create ML instance that monitors this run
609 fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
610 // disable monitoring of other parameters that come e.g. from TFile
611 gMonitoringWriter = 0;
2bb7b766 612
613 AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
614 GetCurrentRun()));
615
441b0e9c 616 // Set run type from run type logbook into current fLogbookEntry
617 SetRunType();
e7f62f16 618
619 // Send the information to ML
620 TMonaLisaText mlStatus("SHUTTLE_status", "Processing");
621
622 TList mlList;
623 mlList.Add(&mlStatus);
624
625 fMonaLisa->SendParameters(&mlList);
626
eba76848 627 fLogbookEntry->Print("all");
57f50b3c 628
629 // Initialization
d477ad88 630 Bool_t hasError = kFALSE;
5164a766 631
2bb7b766 632 AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
633 if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
634 AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
635 if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
d477ad88 636
57f50b3c 637 // Loop on detectors in the configuration
b948db8d 638 TIter iter(fConfig->GetDetectors());
2bb7b766 639 TObjString* aDetector = 0;
b948db8d 640
be48e3ea 641 while ((aDetector = (TObjString*) iter.Next()))
642 {
7bfb2090 643 fCurrentDetector = aDetector->String();
5164a766 644
9e080f92 645 if (ContinueProcessing() == kFALSE) continue;
646
2bb7b766 647 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START ******",
648 GetCurrentRun(), aDetector->GetName()));
649
9d733021 650 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
651
e7f62f16 652 Log(fCurrentDetector.Data(), "Starting processing");
85a80aa9 653
be48e3ea 654 Int_t pid = fork();
655
656 if (pid < 0)
657 {
658 Log("SHUTTLE", "ERROR: Forking failed");
659 }
660 else if (pid > 0)
661 {
662 // parent
663 AliInfo(Form("In parent process of %d - %s: Starting monitoring",
664 GetCurrentRun(), aDetector->GetName()));
665
666 Long_t begin = time(0);
667
668 int status; // to be used with waitpid, on purpose an int (not Int_t)!
669 while (waitpid(pid, &status, WNOHANG) == 0)
670 {
671 Long_t expiredTime = time(0) - begin;
672
673 if (expiredTime > fConfig->GetPPTimeOut())
674 {
675 Log("SHUTTLE", Form("Process time out. Run time: %d seconds. Killing...",
676 expiredTime));
677
678 kill(pid, 9);
679
680 hasError = kTRUE;
681
682 gSystem->Sleep(1000);
683 }
684 else
685 {
686 if (expiredTime % 60 == 0)
687 Log("SHUTTLE", Form("Checked process. Run time: %d seconds.",
688 expiredTime));
689 gSystem->Sleep(1000);
690 }
691 }
692
693 AliInfo(Form("In parent process of %d - %s: Client has terminated.",
694 GetCurrentRun(), aDetector->GetName()));
695
696 if (WIFEXITED(status))
697 {
698 Int_t returnCode = WEXITSTATUS(status);
699
700 Log("SHUTTLE", Form("The return code is %d", returnCode));
701
702 if (returnCode != 0)
703 hasError = kTRUE;
704 }
705 }
706 else if (pid == 0)
707 {
708 // client
709 AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
710
711 UInt_t result = ProcessCurrentDetector();
712
713 Int_t returnCode = 0; // will be set to 1 in case of an error
714
715 if (!result)
716 {
717 returnCode = 1;
718 AliInfo(Form("\n \t\t\t****** run %d - %s: PREPROCESSOR ERROR ****** \n\n",
719 GetCurrentRun(), aDetector->GetName()));
720 }
721 else if (result == 2)
722 {
723 AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
724 GetCurrentRun(), aDetector->GetName()));
725 } else
726 {
727 AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
728 GetCurrentRun(), aDetector->GetName()));
729 }
730
731 if (result > 0)
732 {
2c15234c 733 // Process successful: Update time_processed field in FXS logbooks!
9d733021 734 if (UpdateTable() == kFALSE) returnCode = 1;
be48e3ea 735 }
736
4b95672b 737 for (UInt_t iSys=0; iSys<3; iSys++)
738 {
739 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
740 }
741
be48e3ea 742 AliInfo(Form("Client process of %d - %s is exiting now with %d.",
743 GetCurrentRun(), aDetector->GetName(), returnCode));
744
745 // the client exits here
746 gSystem->Exit(returnCode);
747
748 AliError("We should never get here!!!");
749 }
7bfb2090 750 }
5164a766 751
2bb7b766 752 AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
753 GetCurrentRun()));
754
755 //check if shuttle is done for this run, if so update logbook
756 TObjArray checkEntryArray;
757 checkEntryArray.SetOwner(1);
9e080f92 758 TString whereClause = Form("where run=%d", GetCurrentRun());
759 if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
760 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
761 GetCurrentRun()));
762 return hasError == kFALSE;
763 }
b948db8d 764
9e080f92 765 AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
766 (checkEntryArray.At(0));
2bb7b766 767
9e080f92 768 if (checkEntry)
769 {
770 if (checkEntry->IsDone())
be48e3ea 771 {
9e080f92 772 Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
773 UpdateShuttleLogbook("shuttle_done");
774 }
775 else
776 {
777 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
be48e3ea 778 {
9e080f92 779 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
be48e3ea 780 {
9e080f92 781 AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
782 checkEntry->GetRun(), GetDetName(iDet)));
783 fFirstUnprocessed[iDet] = kFALSE;
be48e3ea 784 }
785 }
2bb7b766 786 }
787 }
788
e7f62f16 789 // remove ML instance
790 delete fMonaLisa;
791 fMonaLisa = 0;
792
2bb7b766 793 fLogbookEntry = 0;
85a80aa9 794
a7160fe9 795 return hasError == kFALSE;
73abe331 796}
797
b948db8d 798//______________________________________________________________________________________________
2bb7b766 799UInt_t AliShuttle::ProcessCurrentDetector()
73abe331 800{
801 //
2bb7b766 802 // Makes data retrieval just for a specific detector (fCurrentDetector).
73abe331 803 // Threre should be a configuration for this detector.
73abe331 804
2bb7b766 805 AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
73abe331 806
7bfb2090 807 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
73abe331 808
2c15234c 809 TMap dcsMap;
810 dcsMap.SetOwner(1);
73abe331 811
85a80aa9 812 Bool_t aDCSError = kFALSE;
813 fGridError = kFALSE;
d477ad88 814
2c15234c 815 // TODO Test only... I've added a flag that allows to
816 // exclude DCS archive DB query
817 if (!fgkProcessDCS)
818 {
819 AliInfo("Skipping DCS processing!");
820 aDCSError = kFALSE;
821 } else {
822 TString host(fConfig->GetDCSHost(fCurrentDetector));
823 Int_t port = fConfig->GetDCSPort(fCurrentDetector);
824
825 // Retrieval of Aliases
826 TObjString* anAlias = 0;
36c99a6a 827 Int_t iAlias = 1;
828 Int_t nTotAliases= ((TMap*)fConfig->GetDCSAliases(fCurrentDetector))->GetEntries();
2c15234c 829 TIter iterAliases(fConfig->GetDCSAliases(fCurrentDetector));
830 while ((anAlias = (TObjString*) iterAliases.Next()))
831 {
832 TObjArray *valueSet = new TObjArray();
833 valueSet->SetOwner(1);
834
36c99a6a 835 if (((iAlias-1) % 500) == 0 || iAlias == nTotAliases)
836 AliInfo(Form("Querying DCS archive: alias %s (%d of %d)",
837 anAlias->GetName(), iAlias++, nTotAliases));
2c15234c 838 aDCSError = (GetValueSet(host, port, anAlias->String(), valueSet, kAlias) == 0);
839
840 if(!aDCSError)
841 {
842 dcsMap.Add(anAlias->Clone(), valueSet);
843 } else {
844 Log(fCurrentDetector,
845 Form("ProcessCurrentDetector - Error while retrieving alias %s",
846 anAlias->GetName()));
847 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
848 dcsMap.DeleteAll();
849 return 0;
850 }
4f0ab988 851 }
2c15234c 852
853 // Retrieval of Data Points
854 TObjString* aDP = 0;
36c99a6a 855 Int_t iDP = 0;
856 Int_t nTotDPs= ((TMap*)fConfig->GetDCSDataPoints(fCurrentDetector))->GetEntries();
2c15234c 857 TIter iterDP(fConfig->GetDCSDataPoints(fCurrentDetector));
858 while ((aDP = (TObjString*) iterDP.Next()))
859 {
860 TObjArray *valueSet = new TObjArray();
861 valueSet->SetOwner(1);
36c99a6a 862 if (((iDP-1) % 500) == 0 || iDP == nTotDPs)
863 AliInfo(Form("Querying DCS archive: DP %s (%d of %d)",
864 aDP->GetName(), iDP++, nTotDPs));
2c15234c 865 aDCSError = (GetValueSet(host, port, aDP->String(), valueSet, kDP) == 0);
866
867 if(!aDCSError)
868 {
869 dcsMap.Add(aDP->Clone(), valueSet);
870 } else {
871 Log(fCurrentDetector,
872 Form("ProcessCurrentDetector - Error while retrieving data point %s",
873 aDP->GetName()));
874 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
875 dcsMap.DeleteAll();
876 return 0;
877 }
73abe331 878 }
879 }
b948db8d 880
2bb7b766 881 // DCS Archive DB processing successful. Call Preprocessor!
85a80aa9 882 UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
a7160fe9 883
85a80aa9 884 AliPreprocessor* aPreprocessor =
5164a766 885 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
b948db8d 886
2bb7b766 887 aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
2c15234c 888 UInt_t aPPResult = aPreprocessor->Process(&dcsMap);
2bb7b766 889
890 UInt_t returnValue = 0;
85a80aa9 891 if (aPPResult == 0) { // Preprocessor error
cb343cfd 892 UpdateShuttleStatus(AliShuttleStatus::kPPError);
2bb7b766 893 returnValue = 0;
85a80aa9 894 } else if (fGridError == kFALSE) { // process and Grid storage ok!
895 UpdateShuttleStatus(AliShuttleStatus::kDone);
2bb7b766 896 UpdateShuttleLogbook(fCurrentDetector, "DONE");
897 Log(fCurrentDetector.Data(),
898 "ProcessCurrentDetector - Preprocessor and Grid storage ended successfully");
899 returnValue = 1;
85a80aa9 900 } else { // Grid storage error (process ok, but object put in local storage)
901 UpdateShuttleStatus(AliShuttleStatus::kStoreFailed);
2bb7b766 902 returnValue = 2;
85a80aa9 903 }
b948db8d 904
2c15234c 905 dcsMap.DeleteAll();
b948db8d 906
2bb7b766 907 return returnValue;
908}
909
910//______________________________________________________________________________________________
911Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
912 TObjArray& entries)
913{
914// Query DAQ's Shuttle logbook and fills detector status object.
915// Call QueryRunParameters to query DAQ logbook for run parameters.
916
fc5a4708 917 entries.SetOwner(1);
918
2bb7b766 919 // check connection, in case connect
be48e3ea 920 if(!Connect(3)) return kFALSE;
2bb7b766 921
922 TString sqlQuery;
441b0e9c 923 sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
2bb7b766 924
be48e3ea 925 TSQLResult* aResult = fServer[3]->Query(sqlQuery);
2bb7b766 926 if (!aResult) {
927 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
928 return kFALSE;
929 }
930
fc5a4708 931 AliDebug(2,Form("Query = %s", sqlQuery.Data()));
932
2bb7b766 933 if(aResult->GetRowCount() == 0) {
9e080f92 934// if(sqlQuery.EndsWith("where shuttle_done=0 order by run")){
935// Log("SHUTTLE", "QueryShuttleLogbook - All runs in Shuttle Logbook are already DONE");
936// delete aResult;
937// return kTRUE;
938// } else {
939 AliInfo("No entries in Shuttle Logbook match request");
2bb7b766 940 delete aResult;
941 return kTRUE;
9e080f92 942// }
2bb7b766 943 }
944
945 // TODO Check field count!
fc5a4708 946 const UInt_t nCols = 22;
2bb7b766 947 if (aResult->GetFieldCount() != (Int_t) nCols) {
948 AliError("Invalid SQL result field number!");
949 delete aResult;
950 return kFALSE;
951 }
952
2bb7b766 953 TSQLRow* aRow;
954 while ((aRow = aResult->Next())) {
955 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
956 Int_t run = runString.Atoi();
957
eba76848 958 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
959 if (!entry)
960 continue;
2bb7b766 961
962 // loop on detectors
eba76848 963 for(UInt_t ii = 0; ii < nCols; ii++)
964 entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
2bb7b766 965
eba76848 966 entries.AddLast(entry);
2bb7b766 967 delete aRow;
968 }
969
9e080f92 970// if(sqlQuery.EndsWith("where shuttle_done=0 order by run"))
971// Log("SHUTTLE", Form("QueryShuttleLogbook - Found %d unprocessed runs in Shuttle Logbook",
972// entries.GetEntriesFast()));
2bb7b766 973 delete aResult;
974 return kTRUE;
975}
976
977//______________________________________________________________________________________________
eba76848 978AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
2bb7b766 979{
eba76848 980 //
981 // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
982 //
2bb7b766 983
984 // check connection, in case connect
be48e3ea 985 if (!Connect(3))
eba76848 986 return 0;
2bb7b766 987
988 TString sqlQuery;
2c15234c 989 sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
2bb7b766 990
be48e3ea 991 TSQLResult* aResult = fServer[3]->Query(sqlQuery);
2bb7b766 992 if (!aResult) {
993 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
eba76848 994 return 0;
2bb7b766 995 }
996
eba76848 997 if (aResult->GetRowCount() == 0) {
2bb7b766 998 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
999 delete aResult;
eba76848 1000 return 0;
2bb7b766 1001 }
1002
eba76848 1003 if (aResult->GetRowCount() > 1) {
2bb7b766 1004 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
1005 delete aResult;
eba76848 1006 return 0;
2bb7b766 1007 }
1008
eba76848 1009 TSQLRow* aRow = aResult->Next();
1010 if (!aRow)
1011 {
1012 AliError(Form("Could not retrieve row for run %d. Skipping", run));
1013 delete aResult;
1014 return 0;
1015 }
2bb7b766 1016
eba76848 1017 AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
2bb7b766 1018
eba76848 1019 for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
1020 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
2bb7b766 1021
eba76848 1022 UInt_t startTime = entry->GetStartTime();
1023 UInt_t endTime = entry->GetEndTime();
1024
1025 if (!startTime || !endTime || startTime > endTime) {
1026 Log("SHUTTLE",
1027 Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
1028 run, startTime, endTime));
1029 delete entry;
2bb7b766 1030 delete aRow;
eba76848 1031 delete aResult;
1032 return 0;
2bb7b766 1033 }
1034
eba76848 1035 delete aRow;
2bb7b766 1036 delete aResult;
eba76848 1037
1038 return entry;
2bb7b766 1039}
1040
1041//______________________________________________________________________________________________
1042Bool_t AliShuttle::TryToStoreAgain()
1043{
1044 // Called in case the detector failed to store the object in Grid OCDB
1045 // It tries to store the object again, if it does not find more recent and overlapping objects
1046 // Calls underlying TryToStoreAgain(const char*) function twice, for OCDB and Reference storage.
1047
1048 AliInfo("Trying to store OCDB data again...");
1049 Bool_t resultCDB = TryToStoreAgain(fgkMainCDB);
1050
1051 AliInfo("Trying to store reference data again...");
1052 Bool_t resultRef = TryToStoreAgain(fgkMainRefStorage);
1053
1054 return resultCDB && resultRef;
1055}
1056
1057//______________________________________________________________________________________________
1058Bool_t AliShuttle::TryToStoreAgain(TString& gridURI)
1059{
1060 // Called by TryToStoreAgain(), performs actual storage retry
1061
6ec0e06c 1062 TObjArray* gridIds=0;
2bb7b766 1063
1064 Bool_t result = kTRUE;
1065
1066 const char* type = 0;
1067 TString backupURI;
1068 if(gridURI == fgkMainCDB) {
1069 type = "OCDB";
1070 backupURI = fgkLocalCDB;
1071 } else if(gridURI == fgkMainRefStorage) {
1072 type = "reference";
1073 backupURI = fgkLocalRefStorage;
1074 } else {
1075 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
1076 return kFALSE;
1077 }
1078
1079 AliCDBManager* man = AliCDBManager::Instance();
1080
1081 AliCDBStorage *gridSto = man->GetStorage(gridURI);
1082 if(!gridSto) {
1083 Log(fCurrentDetector.Data(),
1084 Form("TryToStoreAgain - cannot activate main %s storage", type));
1085 return kFALSE;
1086 }
1087
1088 gridIds = gridSto->GetQueryCDBList();
1089
1090 // get objects previously stored in local CDB
1091 AliCDBStorage *backupSto = man->GetStorage(backupURI);
eba76848 1092 AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
2bb7b766 1093 // Local objects were stored with current run as Grid version!
1094 TList* localEntries = backupSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
1095 localEntries->SetOwner(1);
1096
1097 // loop on local stored objects
1098 TIter localIter(localEntries);
1099 AliCDBEntry *aLocEntry = 0;
1100 while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
1101 aLocEntry->SetOwner(1);
1102 AliCDBId aLocId = aLocEntry->GetId();
1103 aLocEntry->SetVersion(-1);
1104 aLocEntry->SetSubVersion(-1);
1105
1106 // loop on Grid valid Id's
1107 Bool_t store = kTRUE;
1108 TIter gridIter(gridIds);
1109 AliCDBId* aGridId = 0;
1110 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
be48e3ea 1111 // If local object is valid up to infinity we store it only if it is
1112 // the first unprocessed run!
1113 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity())
1114 {
1115 if (!fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1116 {
1117 Log(fCurrentDetector.Data(),
2c15234c 1118 ("TryToStoreAgain - This object has validity infinite but "
1119 "there are previous unprocessed runs!"));
be48e3ea 1120 continue;
1121 } else {
1122 break;
1123 }
2bb7b766 1124 }
1125 if(aGridId->GetPath() != aLocId.GetPath()) continue;
1126 // skip all objects valid up to infinity
1127 if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
1128 // if we get here, it means there's already some more recent object stored on Grid!
1129 store = kFALSE;
1130 break;
1131 }
1132
1133 if(!store){
1134 Log(fCurrentDetector.Data(),
1135 Form("TryToStoreAgain - A more recent object already exists in %s storage: <%s>",
1136 type, aGridId->ToString().Data()));
1137 // removing local filename...
1138 // TODO maybe it's better not to remove it, it was not copied to the Grid!
1139 TString filename;
1140 backupSto->IdToFilename(aLocId, filename);
1141 AliInfo(Form("Removing local file %s", filename.Data()));
1142 gSystem->Exec(Form("rm %s",filename.Data()));
1143 continue;
1144 }
1145
1146 // If we get here, the file can be stored!
1147 Bool_t storeOk = gridSto->Put(aLocEntry);
1148 if(storeOk){
1149 Log(fCurrentDetector.Data(),
1150 Form("TryToStoreAgain - Object <%s> successfully put into %s storage",
1151 aLocId.ToString().Data(), type));
1152
1153 // removing local filename...
1154 TString filename;
1155 backupSto->IdToFilename(aLocId, filename);
1156 AliInfo(Form("Removing local file %s", filename.Data()));
1157 gSystem->Exec(Form("rm %s", filename.Data()));
1158 continue;
1159 } else {
1160 Log(fCurrentDetector.Data(),
1161 Form("TryToStoreAgain - Grid %s storage of object <%s> failed again",
1162 type, aLocId.ToString().Data()));
1163 result = kFALSE;
1164 }
1165 }
1166 localEntries->Clear();
1167
1168 return result;
73abe331 1169}
1170
b948db8d 1171//______________________________________________________________________________________________
2c15234c 1172Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry,
1173 TObjArray* valueSet, DCSType type)
73abe331 1174{
2c15234c 1175// Retrieve all "entry" data points from the DCS server
58bc3020 1176// host, port: TSocket connection parameters
2c15234c 1177// entry: name of the alias or data point
2bb7b766 1178// valueSet: array of retrieved AliDCSValue's
2c15234c 1179// type: kAlias or kDP
58bc3020 1180
73abe331 1181 AliDCSClient client(host, port, fTimeout, fRetries);
2c15234c 1182 if (!client.IsConnected())
1183 {
b948db8d 1184 return kFALSE;
73abe331 1185 }
1186
2c15234c 1187 Int_t result=0;
73abe331 1188
2c15234c 1189 if (type == kAlias)
1190 {
1191 result = client.GetAliasValues(entry,
1192 GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1193 } else
1194 if (type == kDP)
1195 {
1196 result = client.GetDPValues(entry,
1197 GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1198 }
1199
1200 if (result < 0)
1201 {
2bb7b766 1202 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
2c15234c 1203 entry, AliDCSClient::GetErrorString(result)));
73abe331 1204
2c15234c 1205 if (result == AliDCSClient::fgkServerError)
1206 {
2bb7b766 1207 Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
73abe331 1208 client.GetServerError().Data()));
1209 }
1210
1211 return kFALSE;
1212 }
1213
1214 return kTRUE;
1215}
b948db8d 1216
1217//______________________________________________________________________________________________
57f50b3c 1218const char* AliShuttle::GetFile(Int_t system, const char* detector,
1219 const char* id, const char* source)
b948db8d 1220{
57f50b3c 1221// Get calibration file from file exchange servers
9d733021 1222// First queris the FXS database for the file name, using the run, detector, id and source info
1223// then calls RetrieveFile(filename) for actual copy to local disk
2bb7b766 1224// run: current run being processed (given by Logbook entry fLogbookEntry)
eba76848 1225// detector: the Preprocessor name
57f50b3c 1226// id: provided as a parameter by the Preprocessor
1227// source: provided by the Preprocessor through GetFileSources function
1228
1229 // check connection, in case connect
9d733021 1230 if (!Connect(system))
eba76848 1231 {
9d733021 1232 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
57f50b3c 1233 return 0;
1234 }
1235
1236 // Query preparation
9d733021 1237 TString sourceName(source);
d386d623 1238 Int_t nFields = 3;
1239 TString sqlQueryStart = Form("select filePath,size,fileChecksum from %s where",
1240 fConfig->GetFXSdbTable(system));
1241 TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1242 GetCurrentRun(), detector, id);
1243
9d733021 1244 if (system == kDAQ)
1245 {
d386d623 1246 whereClause += Form(" and DAQsource=\"%s\"", source);
57f50b3c 1247 }
9d733021 1248 else if (system == kDCS)
eba76848 1249 {
9d733021 1250 sourceName="none";
57f50b3c 1251 }
9d733021 1252 else if (system == kHLT)
9e080f92 1253 {
d386d623 1254 whereClause += Form(" and DDLnumbers=\"%s\"", source);
9d733021 1255 nFields = 3;
9e080f92 1256 }
1257
9e080f92 1258 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1259
1260 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1261
1262 // Query execution
1263 TSQLResult* aResult = 0;
9d733021 1264 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
9e080f92 1265 if (!aResult) {
9d733021 1266 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
1267 GetSystemName(system), id, sourceName.Data()));
9e080f92 1268 return 0;
1269 }
1270
1271 if(aResult->GetRowCount() == 0)
1272 {
1273 Log(detector,
9d733021 1274 Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
1275 GetSystemName(system), id, sourceName.Data()));
9e080f92 1276 delete aResult;
1277 return 0;
1278 }
2bb7b766 1279
9e080f92 1280 if (aResult->GetRowCount() > 1) {
1281 Log(detector,
9d733021 1282 Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
1283 GetSystemName(system), id, sourceName.Data()));
9e080f92 1284 delete aResult;
1285 return 0;
1286 }
1287
9d733021 1288 if (aResult->GetFieldCount() != nFields) {
9e080f92 1289 Log(detector,
9d733021 1290 Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
1291 GetSystemName(system), id, sourceName.Data()));
9e080f92 1292 delete aResult;
1293 return 0;
1294 }
1295
1296 TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1297
1298 if (!aRow){
9d733021 1299 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
1300 GetSystemName(system), id, sourceName.Data()));
9e080f92 1301 delete aResult;
1302 return 0;
1303 }
1304
1305 TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1306 TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
d386d623 1307 TString fileChecksum(aRow->GetField(2), aRow->GetFieldLength(2));
9e080f92 1308
1309 delete aResult;
1310 delete aRow;
1311
d386d623 1312 AliDebug(2, Form("filePath = %s; size = %s, fileChecksum = %s",
1313 filePath.Data(), fileSize.Data(), fileChecksum.Data()));
9e080f92 1314
9e080f92 1315 // retrieved file is renamed to make it unique
9d733021 1316 TString localFileName = Form("%s_%s_%d_%s_%s.shuttle",
1317 GetSystemName(system), detector, GetCurrentRun(), id, sourceName.Data());
1318
9e080f92 1319
9d733021 1320 // file retrieval from FXS
4b95672b 1321 UInt_t nRetries = 0;
1322 UInt_t maxRetries = 3;
1323 Bool_t result = kFALSE;
1324
1325 // copy!! if successful TSystem::Exec returns 0
1326 while(nRetries++ < maxRetries) {
1327 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1328 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
1329 if(!result)
1330 {
1331 Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
9d733021 1332 filePath.Data(), GetSystemName(system)));
4b95672b 1333 continue;
1334 } else {
1335 AliInfo(Form("File %s copied from %s FXS into %s/%s",
1336 filePath.Data(), GetSystemName(system),
1337 GetShuttleTempDir(), localFileName.Data()));
1338 }
9e080f92 1339
d386d623 1340 if (fileChecksum.Length()>0)
4b95672b 1341 {
1342 // compare md5sum of local file with the one stored in the FXS DB
1343 Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null",
d386d623 1344 GetShuttleTempDir(), localFileName.Data(), fileChecksum.Data()));
9e080f92 1345
4b95672b 1346 if (md5Comp != 0)
1347 {
1348 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
1349 filePath.Data()));
1350 result = kFALSE;
1351 continue;
1352 }
d386d623 1353 } else {
1354 Log(fCurrentDetector, Form("GetFile - md5sum of file %s not set in %s database, skipping comparison",
1355 filePath.Data(), GetSystemName(system)));
9d733021 1356 }
4b95672b 1357 if (result) break;
9e080f92 1358 }
1359
4b95672b 1360 if(!result) return 0;
1361
9d733021 1362 fFXSCalled[system]=kTRUE;
1363 TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
1364 fFXSlist[system].Add(fileParams);
9e080f92 1365
1366 static TString fullLocalFileName;
36c99a6a 1367 fullLocalFileName = TString::Format("%s/%s", GetShuttleTempDir(), localFileName.Data());
1368
9e080f92 1369 AliInfo(Form("fullLocalFileName = %s", fullLocalFileName.Data()));
1370
1371 return fullLocalFileName.Data();
2bb7b766 1372
1373}
1374
1375//______________________________________________________________________________________________
9d733021 1376Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
9e080f92 1377{
9d733021 1378// Copies file from FXS to local Shuttle machine
2bb7b766 1379
9e080f92 1380 // check temp directory: trying to cd to temp; if it does not exist, create it
9d733021 1381 AliDebug(2, Form("Copy file %s from %s FXS into %s/%s",
1382 GetSystemName(system), fxsFileName, GetShuttleTempDir(), localFileName));
9e080f92 1383
36c99a6a 1384 void* dir = gSystem->OpenDirectory(GetShuttleTempDir());
9e080f92 1385 if (dir == NULL) {
36c99a6a 1386 if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) {
1387 AliError(Form("Can't open directory <%s>", GetShuttleTempDir()));
9e080f92 1388 return kFALSE;
1389 }
1390
1391 } else {
1392 gSystem->FreeDirectory(dir);
1393 }
1394
9d733021 1395 TString baseFXSFolder;
1396 if (system == kDAQ)
1397 {
1398 baseFXSFolder = "FES/";
1399 }
1400 else if (system == kDCS)
1401 {
1402 baseFXSFolder = "";
1403 }
1404 else if (system == kHLT)
1405 {
1406 baseFXSFolder = "~/";
1407 }
1408
1409
1410 TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s/%s",
1411 fConfig->GetFXSPort(system),
1412 fConfig->GetFXSUser(system),
1413 fConfig->GetFXSHost(system),
1414 baseFXSFolder.Data(),
1415 fxsFileName,
36c99a6a 1416 GetShuttleTempDir(),
9e080f92 1417 localFileName);
1418
1419 AliDebug(2, Form("%s",command.Data()));
1420
4b95672b 1421 Bool_t result = (gSystem->Exec(command.Data()) == 0);
9e080f92 1422
4b95672b 1423 return result;
9e080f92 1424}
1425
1426//______________________________________________________________________________________________
9d733021 1427TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
1428{
1429// Get sources producing the condition file Id from file exchange servers
1430
1431 if (system == kDCS)
1432 {
1433 AliError("DCS system has only one source of data!");
1434 return NULL;
1435
1436 }
9e080f92 1437
1438 // check connection, in case connect
9d733021 1439 if (!Connect(system))
1440 {
1441 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1442 return NULL;
9e080f92 1443 }
1444
9d733021 1445 TString sourceName = 0;
1446 if (system == kDAQ)
1447 {
1448 sourceName = "DAQsource";
1449 } else if (system == kHLT)
1450 {
1451 sourceName = "DDLnumbers";
1452 }
1453
d386d623 1454 TString sqlQueryStart = Form("select %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(system));
9e080f92 1455 TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1456 GetCurrentRun(), detector, id);
1457 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1458
1459 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1460
1461 // Query execution
1462 TSQLResult* aResult;
9d733021 1463 aResult = fServer[system]->Query(sqlQuery);
9e080f92 1464 if (!aResult) {
9d733021 1465 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
1466 GetSystemName(system), id));
9e080f92 1467 return 0;
1468 }
1469
9d733021 1470 if (aResult->GetRowCount() == 0)
1471 {
9e080f92 1472 Log(detector,
9d733021 1473 Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
9e080f92 1474 delete aResult;
1475 return 0;
1476 }
1477
1478 TSQLRow* aRow;
1479 TList *list = new TList();
1480 list->SetOwner(1);
1481
9d733021 1482 while ((aRow = aResult->Next()))
1483 {
9e080f92 1484
9d733021 1485 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
1486 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
1487 list->Add(new TObjString(source));
9e080f92 1488 delete aRow;
1489 }
9d733021 1490
9e080f92 1491 delete aResult;
1492
1493 return list;
2bb7b766 1494}
1495
1496//______________________________________________________________________________________________
9d733021 1497Bool_t AliShuttle::Connect(Int_t system)
2bb7b766 1498{
9d733021 1499// Connect to MySQL Server of the system's FXS MySQL databases
1500// DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
57f50b3c 1501
9d733021 1502 // check connection: if already connected return
1503 if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
57f50b3c 1504
9d733021 1505 TString dbHost, dbUser, dbPass, dbName;
57f50b3c 1506
9d733021 1507 if (system < 3) // FXS db servers
1508 {
1509 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
1510 dbUser = fConfig->GetFXSdbUser(system);
1511 dbPass = fConfig->GetFXSdbPass(system);
1512 dbName = fConfig->GetFXSdbName(system);
1513 } else { // Run & Shuttle logbook servers
1514 // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
1515 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
1516 dbUser = fConfig->GetDAQlbUser();
1517 dbPass = fConfig->GetDAQlbPass();
1518 dbName = fConfig->GetDAQlbDB();
1519 }
57f50b3c 1520
9d733021 1521 fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
1522 if (!fServer[system] || !fServer[system]->IsConnected()) {
1523 if(system < 3)
1524 {
1525 AliError(Form("Can't establish connection to FXS database for %s",
1526 AliShuttleInterface::GetSystemName(system)));
1527 } else {
1528 AliError("Can't establish connection to Run logbook.");
57f50b3c 1529 }
9d733021 1530 if(fServer[system]) delete fServer[system];
1531 return kFALSE;
2bb7b766 1532 }
57f50b3c 1533
9d733021 1534 // Get tables
1535 TSQLResult* aResult=0;
1536 switch(system){
1537 case kDAQ:
1538 aResult = fServer[kDAQ]->GetTables(dbName.Data());
1539 break;
1540 case kDCS:
1541 aResult = fServer[kDCS]->GetTables(dbName.Data());
1542 break;
1543 case kHLT:
1544 aResult = fServer[kHLT]->GetTables(dbName.Data());
1545 break;
1546 default:
1547 aResult = fServer[3]->GetTables(dbName.Data());
1548 break;
1549 }
1550
1551 delete aResult;
2bb7b766 1552 return kTRUE;
1553}
57f50b3c 1554
9e080f92 1555//______________________________________________________________________________________________
9d733021 1556Bool_t AliShuttle::UpdateTable()
9e080f92 1557{
9d733021 1558// Update FXS table filling time_processed field in all rows corresponding to current run and detector
9e080f92 1559
9d733021 1560 Bool_t result = kTRUE;
9e080f92 1561
9d733021 1562 for (UInt_t system=0; system<3; system++)
1563 {
1564 if(!fFXSCalled[system]) continue;
9e080f92 1565
9d733021 1566 // check connection, in case connect
1567 if (!Connect(system))
1568 {
1569 Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
1570 result = kFALSE;
1571 continue;
9e080f92 1572 }
9e080f92 1573
9d733021 1574 TTimeStamp now; // now
1575
1576 // Loop on FXS list entries
1577 TIter iter(&fFXSlist[system]);
1578 TObjString *aFXSentry=0;
1579 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
1580 {
1581 TString aFXSentrystr = aFXSentry->String();
1582 TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
1583 if (!aFXSarray || aFXSarray->GetEntries() != 2 )
1584 {
1585 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
1586 GetSystemName(system), aFXSentrystr.Data()));
1587 if(aFXSarray) delete aFXSarray;
1588 result = kFALSE;
1589 continue;
1590 }
1591 const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
1592 const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
1593
1594 TString whereClause;
1595 if (system == kDAQ)
1596 {
1597 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
1598 GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
1599 }
1600 else if (system == kDCS)
1601 {
1602 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
1603 GetCurrentRun(), fCurrentDetector.Data(), fileId);
1604 }
1605 else if (system == kHLT)
1606 {
1607 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
1608 GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
1609 }
1610
1611 delete aFXSarray;
9e080f92 1612
9d733021 1613 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
1614 now.GetSec(), whereClause.Data());
9e080f92 1615
9d733021 1616 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
9e080f92 1617
9d733021 1618 // Query execution
1619 TSQLResult* aResult;
1620 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1621 if (!aResult)
1622 {
1623 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
1624 GetSystemName(system), sqlQuery.Data()));
1625 result = kFALSE;
1626 continue;
1627 }
1628 delete aResult;
9e080f92 1629 }
9e080f92 1630 }
1631
9d733021 1632 return result;
9e080f92 1633}
57f50b3c 1634
2bb7b766 1635//______________________________________________________________________________________________
1636Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
1637{
e7f62f16 1638 //
1639 // Update Shuttle logbook filling detector or shuttle_done column
1640 // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
1641 //
57f50b3c 1642
2bb7b766 1643 // check connection, in case connect
be48e3ea 1644 if(!Connect(3)){
2bb7b766 1645 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
1646 return kFALSE;
57f50b3c 1647 }
1648
2bb7b766 1649 TString detName(detector);
1650 TString setClause;
e7f62f16 1651 if(detName == "shuttle_done")
1652 {
2bb7b766 1653 setClause = "set shuttle_done=1";
e7f62f16 1654
1655 // Send the information to ML
1656 TMonaLisaText mlStatus("SHUTTLE_status", "Done");
1657
1658 TList mlList;
1659 mlList.Add(&mlStatus);
1660
1661 fMonaLisa->SendParameters(&mlList);
2bb7b766 1662 } else {
2bb7b766 1663 TString statusStr(status);
1664 if(statusStr.Contains("done", TString::kIgnoreCase) ||
1665 statusStr.Contains("failed", TString::kIgnoreCase)){
eba76848 1666 setClause = Form("set %s=\"%s\"", detector, status);
2bb7b766 1667 } else {
1668 Log("SHUTTLE",
1669 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
1670 status, detector));
1671 return kFALSE;
1672 }
1673 }
57f50b3c 1674
2bb7b766 1675 TString whereClause = Form("where run=%d", GetCurrentRun());
1676
441b0e9c 1677 TString sqlQuery = Form("update %s %s %s",
1678 fConfig->GetShuttlelbTable(), setClause.Data(), whereClause.Data());
57f50b3c 1679
2bb7b766 1680 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1681
1682 // Query execution
1683 TSQLResult* aResult;
be48e3ea 1684 aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2bb7b766 1685 if (!aResult) {
1686 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
1687 return kFALSE;
57f50b3c 1688 }
2bb7b766 1689 delete aResult;
57f50b3c 1690
1691 return kTRUE;
1692}
1693
1694//______________________________________________________________________________________________
2bb7b766 1695Int_t AliShuttle::GetCurrentRun() const
1696{
1697// Get current run from logbook entry
57f50b3c 1698
2bb7b766 1699 return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
57f50b3c 1700}
1701
1702//______________________________________________________________________________________________
2bb7b766 1703UInt_t AliShuttle::GetCurrentStartTime() const
1704{
1705// get current start time
57f50b3c 1706
2bb7b766 1707 return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
57f50b3c 1708}
1709
1710//______________________________________________________________________________________________
2bb7b766 1711UInt_t AliShuttle::GetCurrentEndTime() const
1712{
1713// get current end time from logbook entry
57f50b3c 1714
2bb7b766 1715 return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
57f50b3c 1716}
1717
b948db8d 1718//______________________________________________________________________________________________
1719void AliShuttle::Log(const char* detector, const char* message)
1720{
58bc3020 1721// Fill log string with a message
b948db8d 1722
36c99a6a 1723 void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
84090f85 1724 if (dir == NULL) {
36c99a6a 1725 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
1726 AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
84090f85 1727 return;
1728 }
b948db8d 1729
84090f85 1730 } else {
1731 gSystem->FreeDirectory(dir);
1732 }
b948db8d 1733
cb343cfd 1734 TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
e7f62f16 1735 if (GetCurrentRun() >= 0)
1736 toLog += Form("run %d - ", GetCurrentRun());
2bb7b766 1737 toLog += Form("%s", message);
1738
84090f85 1739 AliInfo(toLog.Data());
b948db8d 1740
84090f85 1741 TString fileName;
e7f62f16 1742 if (GetCurrentRun() >= 0)
1743 fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
1744 else
1745 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
1746
84090f85 1747 gSystem->ExpandPathName(fileName);
1748
1749 ofstream logFile;
1750 logFile.open(fileName, ofstream::out | ofstream::app);
1751
1752 if (!logFile.is_open()) {
1753 AliError(Form("Could not open file %s", fileName.Data()));
1754 return;
1755 }
7bfb2090 1756
84090f85 1757 logFile << toLog.Data() << "\n";
b948db8d 1758
84090f85 1759 logFile.close();
b948db8d 1760}
2bb7b766 1761
2bb7b766 1762//______________________________________________________________________________________________
1763Bool_t AliShuttle::Collect(Int_t run)
1764{
eba76848 1765//
1766// Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
1767// If a dedicated run is given this run is processed
1768//
1769// In operational mode, this is the Shuttle function triggered by the EOR signal.
1770//
2bb7b766 1771
eba76848 1772 if (run == -1)
1773 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
1774 else
1775 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
cb343cfd 1776
1777 SetLastAction("Starting");
2bb7b766 1778
1779 TString whereClause("where shuttle_done=0");
eba76848 1780 if (run != -1)
1781 whereClause += Form(" and run=%d", run);
2bb7b766 1782
1783 TObjArray shuttleLogbookEntries;
be48e3ea 1784 if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
1785 {
cb343cfd 1786 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2bb7b766 1787 return kFALSE;
1788 }
1789
9e080f92 1790 if (shuttleLogbookEntries.GetEntries() == 0)
1791 {
1792 if (run == -1)
1793 Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
1794 else
1795 Log("SHUTTLE", Form("Collect - Run %d is already DONE "
1796 "or it does not exist in Shuttle logbook", run));
1797 return kTRUE;
1798 }
1799
be48e3ea 1800 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1801 fFirstUnprocessed[iDet] = kTRUE;
1802
fc5a4708 1803 if (run != -1)
be48e3ea 1804 {
1805 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
1806 // flag them into fFirstUnprocessed array
1807 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
1808 TObjArray tmpLogbookEntries;
1809 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
1810 {
1811 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1812 return kFALSE;
1813 }
1814
1815 TIter iter(&tmpLogbookEntries);
1816 AliShuttleLogbookEntry* anEntry = 0;
1817 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
1818 {
1819 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1820 {
1821 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1822 {
1823 AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1824 anEntry->GetRun(), GetDetName(iDet)));
1825 fFirstUnprocessed[iDet] = kFALSE;
1826 }
1827 }
1828
1829 }
1830
1831 }
1832
1833 if (!RetrieveConditionsData(shuttleLogbookEntries))
1834 {
cb343cfd 1835 Log("SHUTTLE", "Collect - Process of at least one run failed");
2bb7b766 1836 return kFALSE;
1837 }
1838
36c99a6a 1839 Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
eba76848 1840 return kTRUE;
2bb7b766 1841}
1842
2bb7b766 1843//______________________________________________________________________________________________
1844Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
1845{
1846// Retrieve conditions data for all runs that aren't processed yet
1847
1848 Bool_t hasError = kFALSE;
1849
1850 TIter iter(&dateEntries);
1851 AliShuttleLogbookEntry* anEntry;
1852
1853 while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
1854 if (!Process(anEntry)){
1855 hasError = kTRUE;
1856 }
4b95672b 1857
1858 // clean SHUTTLE temp directory
1859 TString command = Form("rm -f %s/*.shuttle", GetShuttleTempDir());
1860 gSystem->Exec(command.Data());
2bb7b766 1861 }
1862
1863 return hasError == kFALSE;
1864}
cb343cfd 1865
1866//______________________________________________________________________________________________
1867ULong_t AliShuttle::GetTimeOfLastAction() const
1868{
1869 ULong_t tmp;
36c99a6a 1870
cb343cfd 1871 fMonitoringMutex->Lock();
be48e3ea 1872
cb343cfd 1873 tmp = fLastActionTime;
36c99a6a 1874
cb343cfd 1875 fMonitoringMutex->UnLock();
36c99a6a 1876
cb343cfd 1877 return tmp;
1878}
1879
1880//______________________________________________________________________________________________
1881const TString AliShuttle::GetLastAction() const
1882{
1883 // returns a string description of the last action
1884
1885 TString tmp;
36c99a6a 1886
cb343cfd 1887 fMonitoringMutex->Lock();
1888
1889 tmp = fLastAction;
1890
1891 fMonitoringMutex->UnLock();
1892
36c99a6a 1893 return tmp;
cb343cfd 1894}
1895
1896//______________________________________________________________________________________________
1897void AliShuttle::SetLastAction(const char* action)
1898{
1899 // updates the monitoring variables
36c99a6a 1900
cb343cfd 1901 fMonitoringMutex->Lock();
36c99a6a 1902
cb343cfd 1903 fLastAction = action;
1904 fLastActionTime = time(0);
1905
1906 fMonitoringMutex->UnLock();
1907}
eba76848 1908
1909//______________________________________________________________________________________________
1910const char* AliShuttle::GetRunParameter(const char* param)
1911{
1912// returns run parameter read from DAQ logbook
1913
1914 if(!fLogbookEntry) {
1915 AliError("No logbook entry!");
1916 return 0;
1917 }
1918
1919 return fLogbookEntry->GetRunParameter(param);
1920}
57c1a579 1921
d386d623 1922//______________________________________________________________________________________________
1923AliCDBEntry* AliShuttle::GetFromOCDB(const AliCDBPath& path)
1924{
1925// returns obiect from OCDB valid for current run
1926
1927 AliCDBStorage *sto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
1928 if (!sto)
1929 {
1930 Log("SHUTTLE", "GetFromOCDB - Cannot activate main OCDB for query!");
1931 return 0;
1932 }
1933
1934 return dynamic_cast<AliCDBEntry*> (sto->Get(path, GetCurrentRun()));
1935}
1936
57c1a579 1937//______________________________________________________________________________________________
1938Bool_t AliShuttle::SendMail()
1939{
1940// sends a mail to the subdetector expert in case of preprocessor error
1941
36c99a6a 1942 void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
57c1a579 1943 if (dir == NULL)
1944 {
36c99a6a 1945 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
57c1a579 1946 {
36c99a6a 1947 AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
57c1a579 1948 return kFALSE;
1949 }
1950
1951 } else {
1952 gSystem->FreeDirectory(dir);
1953 }
1954
1955 TString bodyFileName;
36c99a6a 1956 bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
57c1a579 1957 gSystem->ExpandPathName(bodyFileName);
1958
1959 ofstream mailBody;
1960 mailBody.open(bodyFileName, ofstream::out);
1961
1962 if (!mailBody.is_open())
1963 {
1964 AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
1965 return kFALSE;
1966 }
1967
1968 TString to="";
1969 TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
1970 TObjString *anExpert=0;
1971 while ((anExpert = (TObjString*) iterExperts.Next()))
1972 {
1973 to += Form("%s,", anExpert->GetName());
1974 }
1975 to.Remove(to.Length()-1);
909732f7 1976 AliDebug(2, Form("to: %s",to.Data()));
57c1a579 1977
36c99a6a 1978 // TODO this will be removed...
1979 if (to.Contains("not_yet_set")) {
1980 AliInfo("List of detector responsibles not yet set!");
1981 return kFALSE;
1982 }
1983
57c1a579 1984 TString cc="alberto.colla@cern.ch";
1985
1986 TString subject = Form("%s Shuttle preprocessor error in run %d !",
1987 fCurrentDetector.Data(), GetCurrentRun());
909732f7 1988 AliDebug(2, Form("subject: %s", subject.Data()));
57c1a579 1989
1990 TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
1991 body += Form("SHUTTLE just detected that your preprocessor "
36c99a6a 1992 "exited with ERROR state in run %d!!\n\n", GetCurrentRun());
57c1a579 1993 body += Form("Please check %s status on the web page asap!\n\n", fCurrentDetector.Data());
1994 body += Form("The last 10 lines of %s log file are following:\n\n");
1995
909732f7 1996 AliDebug(2, Form("Body begin: %s", body.Data()));
57c1a579 1997
1998 mailBody << body.Data();
1999 mailBody.close();
2000 mailBody.open(bodyFileName, ofstream::out | ofstream::app);
2001
9d733021 2002 TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
57c1a579 2003 TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
2004 if (gSystem->Exec(tailCommand.Data()))
2005 {
2006 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
2007 }
2008
2009 TString endBody = Form("------------------------------------------------------\n\n");
36c99a6a 2010 endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
2011 endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
57c1a579 2012 endBody += "Sincerely yours,\n\n \t\t\tthe SHUTTLE\n";
2013
909732f7 2014 AliDebug(2, Form("Body end: %s", endBody.Data()));
57c1a579 2015
2016 mailBody << endBody.Data();
2017
2018 mailBody.close();
2019
2020 // send mail!
2021 TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
2022 subject.Data(),
2023 cc.Data(),
2024 to.Data(),
2025 bodyFileName.Data());
909732f7 2026 AliDebug(2, Form("mail command: %s", mailCommand.Data()));
57c1a579 2027
2028 Bool_t result = gSystem->Exec(mailCommand.Data());
2029
2030 return result == 0;
2031}
d386d623 2032
441b0e9c 2033//______________________________________________________________________________________________
2034void AliShuttle::SetRunType()
2035{
2036// Gets run type from logbook and fills current Shuttle logbook entry
2037
2038 // check connection, in case connect
2039 if(!Connect(3)){
2040 Log("SHUTTLE", "GetRunType - Couldn't connect to DAQ Logbook.");
2041 return;
2042 }
2043
2044 TString sqlQuery = Form("select detector,run_type from %s where run_number=%d",
2045 fConfig->GetRunTypelbTable(), GetCurrentRun());
2046
2047 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
2048
2049 // Query execution
2050 TSQLResult* aResult;
2051 aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2052 if (!aResult) {
2053 Log("SHUTTLE", Form("GetRunType - Can't execute query <%s>", sqlQuery.Data()));
2054 return;
2055 }
2056
2057 TSQLRow* aRow;
2058 while ((aRow = aResult->Next())) {
2059 TString det(aRow->GetField(0), aRow->GetFieldLength(0));
2060 TString runType(aRow->GetField(1), aRow->GetFieldLength(1));
2061
2062 fLogbookEntry->SetRunType(det, runType);
2063 delete aRow;
2064 }
2065
2066 delete aResult;
2067
2068 return;
2069
2070}
2071
2072//______________________________________________________________________________________________
2073const char* AliShuttle::GetRunType(const char* detCode)
2074{
2075// returns run type read from "run type" logbook
2076
2077 if(!fLogbookEntry) {
2078 AliError("No logbook entry!");
2079 return 0;
2080 }
2081
2082 return fLogbookEntry->GetRunType(detCode);
2083}
2084
d386d623 2085//______________________________________________________________________________________________
2086void AliShuttle::SetShuttleTempDir(const char* tmpDir)
2087{
2088// sets Shuttle temp directory
2089
2090 fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
2091}
2092
2093//______________________________________________________________________________________________
2094void AliShuttle::SetShuttleLogDir(const char* logDir)
2095{
2096// sets Shuttle log directory
2097
2098 fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
2099}