Check md5sum of retrieved FXS file inside "retry loop"; clean temp folder after
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
CommitLineData
73abe331 1/**************************************************************************
2 * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3 * *
4 * Author: The ALICE Off-line Project. *
5 * Contributors are mentioned in the code where appropriate. *
6 * *
7 * Permission to use, copy, modify and distribute this software and its *
8 * documentation strictly for non-commercial purposes is hereby granted *
9 * without fee, provided that the above copyright notice appears in all *
10 * copies and that both the copyright notice and this permission notice *
11 * appear in the supporting documentation. The authors make no claims *
12 * about the suitability of this software for any purpose. It is *
13 * provided "as is" without express or implied warranty. *
14 **************************************************************************/
15
16/*
17$Log$
9d733021 18Revision 1.27 2007/01/30 17:52:42 jgrosseo
19adding monalisa monitoring
20
e7f62f16 21Revision 1.26 2007/01/23 19:20:03 acolla
22Removed old ldif files, added TOF, MCH ldif files. Added some options in
23AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and
24SetShuttleLogDir
25
36c99a6a 26Revision 1.25 2007/01/15 19:13:52 acolla
27Moved some AliInfo to AliDebug in SendMail function
28
fc5a4708 29Revision 1.21 2006/12/07 08:51:26 jgrosseo
30update (alberto):
31table, db names in ldap configuration
32added GRP preprocessor
33DCS data can also be retrieved by data point
34
2c15234c 35Revision 1.20 2006/11/16 16:16:48 jgrosseo
36introducing strict run ordering flag
37removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
38
be48e3ea 39Revision 1.19 2006/11/06 14:23:04 jgrosseo
40major update (Alberto)
41o) reading of run parameters from the logbook
42o) online offline naming conversion
43o) standalone DCSclient package
44
eba76848 45Revision 1.18 2006/10/20 15:22:59 jgrosseo
46o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
47o) Merging Collect, CollectAll, CollectNew function
48o) Removing implementation of empty copy constructors (declaration still there!)
49
cb343cfd 50Revision 1.17 2006/10/05 16:20:55 jgrosseo
51adapting to new CDB classes
52
6ec0e06c 53Revision 1.16 2006/10/05 15:46:26 jgrosseo
54applying to the new interface
55
481441a2 56Revision 1.15 2006/10/02 16:38:39 jgrosseo
57update (alberto):
58fixed memory leaks
59storing of objects that failed to be stored to the grid before
60interfacing of shuttle status table in daq system
61
2bb7b766 62Revision 1.14 2006/08/29 09:16:05 jgrosseo
63small update
64
85a80aa9 65Revision 1.13 2006/08/15 10:50:00 jgrosseo
66effc++ corrections (alberto)
67
4f0ab988 68Revision 1.12 2006/08/08 14:19:29 jgrosseo
69Update to shuttle classes (Alberto)
70
71- Possibility to set the full object's path in the Preprocessor's and
72Shuttle's Store functions
73- Possibility to extend the object's run validity in the same classes
74("startValidity" and "validityInfinite" parameters)
75- Implementation of the StoreReferenceData function to store reference
76data in a dedicated CDB storage.
77
84090f85 78Revision 1.11 2006/07/21 07:37:20 jgrosseo
79last run is stored after each run
80
7bfb2090 81Revision 1.10 2006/07/20 09:54:40 jgrosseo
82introducing status management: The processing per subdetector is divided into several steps,
83after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
84can keep track of the number of failures and skips further processing after a certain threshold is
85exceeded. These thresholds can be configured in LDAP.
86
5164a766 87Revision 1.9 2006/07/19 10:09:55 jgrosseo
88new configuration, accesst to DAQ FES (Alberto)
89
57f50b3c 90Revision 1.8 2006/07/11 12:44:36 jgrosseo
91adding parameters for extended validity range of data produced by preprocessor
92
17111222 93Revision 1.7 2006/07/10 14:37:09 jgrosseo
94small fix + todo comment
95
e090413b 96Revision 1.6 2006/07/10 13:01:41 jgrosseo
97enhanced storing of last sucessfully processed run (alberto)
98
a7160fe9 99Revision 1.5 2006/07/04 14:59:57 jgrosseo
100revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
101
45a493ce 102Revision 1.4 2006/06/12 09:11:16 jgrosseo
103coding conventions (Alberto)
104
58bc3020 105Revision 1.3 2006/06/06 14:26:40 jgrosseo
106o) removed files that were moved to STEER
107o) shuttle updated to follow the new interface (Alberto)
108
b948db8d 109Revision 1.2 2006/03/07 07:52:34 hristov
110New version (B.Yordanov)
111
d477ad88 112Revision 1.6 2005/11/19 17:19:14 byordano
113RetrieveDATEEntries and RetrieveConditionsData added
114
115Revision 1.5 2005/11/19 11:09:27 byordano
116AliShuttle declaration added
117
118Revision 1.4 2005/11/17 17:47:34 byordano
119TList changed to TObjArray
120
121Revision 1.3 2005/11/17 14:43:23 byordano
122import to local CVS
123
124Revision 1.1.1.1 2005/10/28 07:33:58 hristov
125Initial import as subdirectory in AliRoot
126
73abe331 127Revision 1.2 2005/09/13 08:41:15 byordano
128default startTime endTime added
129
130Revision 1.4 2005/08/30 09:13:02 byordano
131some docs added
132
133Revision 1.3 2005/08/29 21:15:47 byordano
134some docs added
135
136*/
137
138//
139// This class is the main manager for AliShuttle.
140// It organizes the data retrieval from DCS and call the
b948db8d 141// interface methods of AliPreprocessor.
73abe331 142// For every detector in AliShuttleConfgi (see AliShuttleConfig),
143// data for its set of aliases is retrieved. If there is registered
b948db8d 144// AliPreprocessor for this detector then it will be used
145// accroding to the schema (see AliPreprocessor).
146// If there isn't registered AliPreprocessor than the retrieved
73abe331 147// data is stored automatically to the undelying AliCDBStorage.
148// For detSpec is used the alias name.
149//
150
151#include "AliShuttle.h"
152
153#include "AliCDBManager.h"
154#include "AliCDBStorage.h"
155#include "AliCDBId.h"
84090f85 156#include "AliCDBRunRange.h"
157#include "AliCDBPath.h"
5164a766 158#include "AliCDBEntry.h"
73abe331 159#include "AliShuttleConfig.h"
eba76848 160#include "DCSClient/AliDCSClient.h"
73abe331 161#include "AliLog.h"
b948db8d 162#include "AliPreprocessor.h"
5164a766 163#include "AliShuttleStatus.h"
2bb7b766 164#include "AliShuttleLogbookEntry.h"
73abe331 165
57f50b3c 166#include <TSystem.h>
58bc3020 167#include <TObject.h>
b948db8d 168#include <TString.h>
57f50b3c 169#include <TTimeStamp.h>
73abe331 170#include <TObjString.h>
57f50b3c 171#include <TSQLServer.h>
172#include <TSQLResult.h>
173#include <TSQLRow.h>
cb343cfd 174#include <TMutex.h>
73abe331 175
e7f62f16 176#include <TMonaLisaWriter.h>
177
5164a766 178#include <fstream>
179
cb343cfd 180#include <sys/types.h>
181#include <sys/wait.h>
182
73abe331 183ClassImp(AliShuttle)
184
2bb7b766 185TString AliShuttle::fgkMainCDB("alien://folder=ShuttleCDB");
84090f85 186TString AliShuttle::fgkLocalCDB("local://LocalShuttleCDB");
2bb7b766 187TString AliShuttle::fgkMainRefStorage("alien://folder=ShuttleReference");
84090f85 188TString AliShuttle::fgkLocalRefStorage("local://LocalReferenceStorage");
189
4f0ab988 190Bool_t AliShuttle::fgkProcessDCS(kTRUE);
191
36c99a6a 192TString AliShuttle::fgkShuttleTempDir = gSystem->ExpandPathName("$ALICE_ROOT/SHUTTLE/temp");
193TString AliShuttle::fgkShuttleLogDir = gSystem->ExpandPathName("$ALICE_ROOT/SHUTTLE/log");
57f50b3c 194
b948db8d 195//______________________________________________________________________________________________
196AliShuttle::AliShuttle(const AliShuttleConfig* config,
197 UInt_t timeout, Int_t retries):
4f0ab988 198fConfig(config),
199fTimeout(timeout), fRetries(retries),
200fPreprocessorMap(),
2bb7b766 201fLogbookEntry(0),
eba76848 202fCurrentDetector(),
85a80aa9 203fStatusEntry(0),
cb343cfd 204fGridError(kFALSE),
205fMonitoringMutex(0),
eba76848 206fLastActionTime(0),
e7f62f16 207fLastAction(),
208fMonaLisa(0)
73abe331 209{
210 //
211 // config: AliShuttleConfig used
73abe331 212 // timeout: timeout used for AliDCSClient connection
213 // retries: the number of retries in case of connection error.
214 //
215
57f50b3c 216 if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
be48e3ea 217 for(int iSys=0;iSys<4;iSys++) {
57f50b3c 218 fServer[iSys]=0;
be48e3ea 219 if (iSys < 3)
2c15234c 220 fFXSlist[iSys].SetOwner(kTRUE);
57f50b3c 221 }
2bb7b766 222 fPreprocessorMap.SetOwner(kTRUE);
be48e3ea 223
224 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
225 fFirstUnprocessed[iDet] = kFALSE;
226
cb343cfd 227 fMonitoringMutex = new TMutex();
58bc3020 228}
229
b948db8d 230//______________________________________________________________________________________________
57f50b3c 231AliShuttle::~AliShuttle()
58bc3020 232{
233// destructor
234
b948db8d 235 fPreprocessorMap.DeleteAll();
be48e3ea 236 for(int iSys=0;iSys<4;iSys++)
57f50b3c 237 if(fServer[iSys]) {
238 fServer[iSys]->Close();
239 delete fServer[iSys];
eba76848 240 fServer[iSys] = 0;
57f50b3c 241 }
2bb7b766 242
243 if (fStatusEntry){
244 delete fStatusEntry;
245 fStatusEntry = 0;
246 }
cb343cfd 247
248 if (fMonitoringMutex)
249 {
250 delete fMonitoringMutex;
251 fMonitoringMutex = 0;
252 }
73abe331 253}
254
b948db8d 255//______________________________________________________________________________________________
57f50b3c 256void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor)
58bc3020 257{
73abe331 258 //
b948db8d 259 // Registers new AliPreprocessor.
73abe331 260 // It uses GetName() for indentificator of the pre processor.
261 // The pre processor is registered it there isn't any other
262 // with the same identificator (GetName()).
263 //
264
eba76848 265 const char* detName = preprocessor->GetName();
266 if(GetDetPos(detName) < 0)
267 AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName));
268
269 if (fPreprocessorMap.GetValue(detName)) {
270 AliWarning(Form("AliPreprocessor %s is already registered!", detName));
73abe331 271 return;
272 }
273
eba76848 274 fPreprocessorMap.Add(new TObjString(detName), preprocessor);
73abe331 275}
b948db8d 276//______________________________________________________________________________________________
84090f85 277UInt_t AliShuttle::Store(const AliCDBPath& path, TObject* object,
278 AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite)
73abe331 279{
84090f85 280 // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for
281 // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored
282 // using this function. Use StoreReferenceData instead!
85a80aa9 283 // It calls WriteToCDB function which perform actual storage
b948db8d 284
85a80aa9 285 return WriteToCDB(fgkMainCDB, fgkLocalCDB, path, object,
286 metaData, validityStart, validityInfinite);
84090f85 287
288}
289
290//______________________________________________________________________________________________
481441a2 291UInt_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData)
84090f85 292{
293 // Stores a CDB object in the storage for reference data. This objects will not be available during
294 // offline reconstrunction. Use this function for reference data only!
85a80aa9 295 // It calls WriteToCDB function which perform actual storage
296
481441a2 297 return WriteToCDB(fgkMainRefStorage, fgkLocalRefStorage, path, object, metaData);
84090f85 298
85a80aa9 299}
300
301//______________________________________________________________________________________________
302UInt_t AliShuttle::WriteToCDB(const char* mainUri, const char* localUri,
303 const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
304 Int_t validityStart, Bool_t validityInfinite)
305{
306 // write object into the CDB. Parameters are passed by Store and StoreReferenceData functions.
307 // The parameters are:
308 // 1) Uri of the main storage (Grid)
309 // 2) Uri of the backup storage (Local)
310 // 3) the object's path.
311 // 4) the object to be stored
312 // 5) the metaData to be associated with the object
313 // 6) the validity start run number w.r.t. the current run,
84090f85 314 // if the data is valid only for this run leave the default 0
85a80aa9 315 // 7) specifies if the calibration data is valid for infinity (this means until updated),
84090f85 316 // typical for calibration runs, the default is kFALSE
317 //
84090f85 318 // returns 0 if fail
85a80aa9 319 // 1 if stored in main (Grid) storage
320 // 2 if stored in backup (Local) storage
84090f85 321
85a80aa9 322 const char* cdbType = (mainUri == fgkMainCDB) ? "CDB" : "Reference";
2bb7b766 323
85a80aa9 324 Int_t firstRun = GetCurrentRun() - validityStart;
84090f85 325 if(firstRun < 0) {
2bb7b766 326 AliError("First valid run happens to be less than 0! Setting it to 0.");
84090f85 327 firstRun=0;
328 }
329
330 Int_t lastRun = -1;
331 if(validityInfinite) {
332 lastRun = AliCDBRunRange::Infinity();
333 } else {
334 lastRun = GetCurrentRun();
335 }
336
2bb7b766 337 AliCDBId id(path, firstRun, lastRun, -1, -1);
338
339 if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){
340 TObjString runUsed = Form("%d", GetCurrentRun());
9e080f92 341 metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone());
2bb7b766 342 }
84090f85 343
344 UInt_t result = 0;
345
85a80aa9 346 if (!(AliCDBManager::Instance()->GetStorage(mainUri))) {
2bb7b766 347 AliError(Form("WriteToCDB - Cannot activate main %s storage", cdbType));
84090f85 348 } else {
85a80aa9 349 result = (UInt_t) AliCDBManager::Instance()->GetStorage(mainUri)
84090f85 350 ->Put(object, id, metaData);
351 }
352
353 if(!result) {
354
355 Log(fCurrentDetector,
2bb7b766 356 Form("WriteToCDB - Problem with main %s storage. Putting <%s> into backup storage",
357 cdbType, path.GetPath().Data()));
358
359 // Set Grid version to current run number, to ease retrieval later
360 id.SetVersion(GetCurrentRun());
84090f85 361
85a80aa9 362 result = AliCDBManager::Instance()->GetStorage(localUri)
84090f85 363 ->Put(object, id, metaData);
364
365 if(result) {
366 result = 2;
85a80aa9 367 fGridError = kTRUE;
84090f85 368 }else{
2bb7b766 369 Log(fCurrentDetector, "WriteToCDB - Can't store data!");
b948db8d 370 }
371 }
2bb7b766 372
b948db8d 373 return result;
374
73abe331 375}
376
b948db8d 377//______________________________________________________________________________________________
5164a766 378AliShuttleStatus* AliShuttle::ReadShuttleStatus()
379{
2bb7b766 380// Reads the AliShuttleStatus from the CDB
5164a766 381
2bb7b766 382 if (fStatusEntry){
383 delete fStatusEntry;
384 fStatusEntry = 0;
385 }
5164a766 386
2bb7b766 387 fStatusEntry = AliCDBManager::Instance()->GetStorage(AliShuttle::GetLocalCDB())
388 ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun());
5164a766 389
2bb7b766 390 if (!fStatusEntry) return 0;
391 fStatusEntry->SetOwner(1);
5164a766 392
2bb7b766 393 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
394 if (!status) {
395 AliError("Invalid object stored to CDB!");
396 return 0;
397 }
5164a766 398
2bb7b766 399 return status;
5164a766 400}
401
402//______________________________________________________________________________________________
7bfb2090 403Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status)
5164a766 404{
2bb7b766 405// writes the status for one subdetector
406
407 if (fStatusEntry){
408 delete fStatusEntry;
409 fStatusEntry = 0;
410 }
5164a766 411
2bb7b766 412 Int_t run = GetCurrentRun();
5164a766 413
2bb7b766 414 AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run);
5164a766 415
2bb7b766 416 fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData);
417 fStatusEntry->SetOwner(1);
5164a766 418
2bb7b766 419 UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
7bfb2090 420
2bb7b766 421 if (!result) {
422 AliError(Form("WriteShuttleStatus for %s, run %d failed", fCurrentDetector.Data(), run));
423 return kFALSE;
424 }
e7f62f16 425
426 SendMLInfo();
7bfb2090 427
2bb7b766 428 return kTRUE;
5164a766 429}
430
431//______________________________________________________________________________________________
432void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount)
433{
434 // changes the AliShuttleStatus for the given detector and run to the given status
435
2bb7b766 436 if (!fStatusEntry){
437 AliError("UNEXPECTED: fStatusEntry empty");
438 return;
439 }
5164a766 440
2bb7b766 441 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
5164a766 442
2bb7b766 443 if (!status){
444 AliError("UNEXPECTED: status could not be read from current CDB entry");
445 return;
446 }
5164a766 447
2c15234c 448 TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s",
eba76848 449 fCurrentDetector.Data(),
36c99a6a 450 status->GetStatusName(),
eba76848 451 status->GetStatusName(newStatus));
cb343cfd 452 Log("SHUTTLE", actionStr);
453 SetLastAction(actionStr);
5164a766 454
2bb7b766 455 status->SetStatus(newStatus);
456 if (increaseCount) status->IncreaseCount();
5164a766 457
2bb7b766 458 AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry);
e7f62f16 459
460 SendMLInfo();
5164a766 461}
e7f62f16 462
463//______________________________________________________________________________________________
464void AliShuttle::SendMLInfo()
465{
466 //
467 // sends ML information about the current status of the current detector being processed
468 //
469
470 AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject());
471
472 if (!status){
473 AliError("UNEXPECTED: status could not be read from current CDB entry");
474 return;
475 }
476
477 TMonaLisaText mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName());
478 TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount());
479
480 TList mlList;
481 mlList.Add(&mlStatus);
482 mlList.Add(&mlRetryCount);
483
484 fMonaLisa->SendParameters(&mlList);
485}
486
5164a766 487//______________________________________________________________________________________________
488Bool_t AliShuttle::ContinueProcessing()
489{
2bb7b766 490// this function reads the AliShuttleStatus information from CDB and
491// checks if the processing should be continued
492// if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
493
57c1a579 494 if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
495
496 AliPreprocessor* aPreprocessor =
497 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
498 if (!aPreprocessor)
499 {
500 AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data()));
501 return kFALSE;
502 }
503
2bb7b766 504 AliShuttleLogbookEntry::Status entryStatus =
eba76848 505 fLogbookEntry->GetDetectorStatus(fCurrentDetector);
2bb7b766 506
507 if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) {
9e080f92 508 AliInfo(Form("ContinueProcessing - %s is %s",
2bb7b766 509 fCurrentDetector.Data(),
510 fLogbookEntry->GetDetectorStatusName(entryStatus)));
511 return kFALSE;
512 }
513
514 // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state
be48e3ea 515
516 // check if current run is first unprocessed run for current detector
517 if (fConfig->StrictRunOrder(fCurrentDetector) &&
518 !fFirstUnprocessed[GetDetPos(fCurrentDetector)])
519 {
520 Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!"));
521 return kFALSE;
522 }
523
2bb7b766 524 AliShuttleStatus* status = ReadShuttleStatus();
525 if (!status) {
526 // first time
527 Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time",
528 fCurrentDetector.Data()));
529 status = new AliShuttleStatus(AliShuttleStatus::kStarted);
530 return WriteShuttleStatus(status);
531 }
532
533 // The following two cases shouldn't happen if Shuttle Logbook was correctly updated.
534 // If it happens it may mean Logbook updating failed... let's do it now!
535 if (status->GetStatus() == AliShuttleStatus::kDone ||
536 status->GetStatus() == AliShuttleStatus::kFailed){
537 Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook",
538 fCurrentDetector.Data(),
539 status->GetStatusName(status->GetStatus())));
540 UpdateShuttleLogbook(fCurrentDetector.Data(),
541 status->GetStatusName(status->GetStatus()));
542 return kFALSE;
543 }
544
545 if (status->GetStatus() == AliShuttleStatus::kStoreFailed) {
546 Log("SHUTTLE",
547 Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now",
548 fCurrentDetector.Data()));
549 if(TryToStoreAgain()){
550 Log(fCurrentDetector.Data(), "ContinueProcessing - All objects successfully stored into OCDB");
551 UpdateShuttleStatus(AliShuttleStatus::kDone);
552 UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
553 } else {
554 Log("SHUTTLE",
555 Form("ContinueProcessing - %s: Grid storage failed again",
556 fCurrentDetector.Data()));
e7f62f16 557 // trigger ML information manually because we do not had a status change
558 SendMLInfo();
2bb7b766 559 }
560 return kFALSE;
561 }
562
563 // if we get here, there is a restart
57c1a579 564 Bool_t cont = kFALSE;
2bb7b766 565
566 // abort conditions
cb343cfd 567 if (status->GetCount() >= fConfig->GetMaxRetries()) {
57c1a579 568 Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
569 "Updating Shuttle Logbook", fCurrentDetector.Data(),
2bb7b766 570 status->GetCount(), status->GetStatusName()));
571 UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
e7f62f16 572 UpdateShuttleStatus(AliShuttleStatus::kFailed);
57c1a579 573 } else {
574 Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
575 "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
576 status->GetStatusName(), status->GetCount()));
577 UpdateShuttleStatus(AliShuttleStatus::kStarted, kTRUE);
578 cont = kTRUE;
2bb7b766 579 }
580
57c1a579 581 // Send mail to detector expert!
582 AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data()));
583 if (!SendMail())
584 Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
585 fCurrentDetector.Data()));
2bb7b766 586
57c1a579 587 return cont;
5164a766 588}
589
590//______________________________________________________________________________________________
2bb7b766 591Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
58bc3020 592{
73abe331 593 //
b948db8d 594 // Makes data retrieval for all detectors in the configuration.
2bb7b766 595 // entry: Shuttle logbook entry, contains run paramenters and status of detectors
596 // (Unprocessed, Inactive, Failed or Done).
d477ad88 597 // Returns kFALSE in case of error occured and kTRUE otherwise
73abe331 598 //
599
2bb7b766 600 if(!entry) return kFALSE;
601
602 fLogbookEntry = entry;
603
e7f62f16 604 if (fLogbookEntry->IsDone())
605 {
2bb7b766 606 Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook");
607 UpdateShuttleLogbook("shuttle_done");
608 fLogbookEntry = 0;
609 return kTRUE;
610 }
611
e7f62f16 612 // create ML instance that monitors this run
613 fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
614 // disable monitoring of other parameters that come e.g. from TFile
615 gMonitoringWriter = 0;
2bb7b766 616
617 AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n",
618 GetCurrentRun()));
619
e7f62f16 620
621 // Send the information to ML
622 TMonaLisaText mlStatus("SHUTTLE_status", "Processing");
623
624 TList mlList;
625 mlList.Add(&mlStatus);
626
627 fMonaLisa->SendParameters(&mlList);
628
eba76848 629 fLogbookEntry->Print("all");
57f50b3c 630
631 // Initialization
d477ad88 632 Bool_t hasError = kFALSE;
5164a766 633
2bb7b766 634 AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
635 if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
636 AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
637 if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
d477ad88 638
57f50b3c 639 // Loop on detectors in the configuration
b948db8d 640 TIter iter(fConfig->GetDetectors());
2bb7b766 641 TObjString* aDetector = 0;
b948db8d 642
be48e3ea 643 while ((aDetector = (TObjString*) iter.Next()))
644 {
7bfb2090 645 fCurrentDetector = aDetector->String();
5164a766 646
9e080f92 647 if (ContinueProcessing() == kFALSE) continue;
648
2bb7b766 649 AliInfo(Form("\n\n \t\t\t****** run %d - %s: START ******",
650 GetCurrentRun(), aDetector->GetName()));
651
9d733021 652 for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE;
653
e7f62f16 654 Log(fCurrentDetector.Data(), "Starting processing");
85a80aa9 655
be48e3ea 656 Int_t pid = fork();
657
658 if (pid < 0)
659 {
660 Log("SHUTTLE", "ERROR: Forking failed");
661 }
662 else if (pid > 0)
663 {
664 // parent
665 AliInfo(Form("In parent process of %d - %s: Starting monitoring",
666 GetCurrentRun(), aDetector->GetName()));
667
668 Long_t begin = time(0);
669
670 int status; // to be used with waitpid, on purpose an int (not Int_t)!
671 while (waitpid(pid, &status, WNOHANG) == 0)
672 {
673 Long_t expiredTime = time(0) - begin;
674
675 if (expiredTime > fConfig->GetPPTimeOut())
676 {
677 Log("SHUTTLE", Form("Process time out. Run time: %d seconds. Killing...",
678 expiredTime));
679
680 kill(pid, 9);
681
682 hasError = kTRUE;
683
684 gSystem->Sleep(1000);
685 }
686 else
687 {
688 if (expiredTime % 60 == 0)
689 Log("SHUTTLE", Form("Checked process. Run time: %d seconds.",
690 expiredTime));
691 gSystem->Sleep(1000);
692 }
693 }
694
695 AliInfo(Form("In parent process of %d - %s: Client has terminated.",
696 GetCurrentRun(), aDetector->GetName()));
697
698 if (WIFEXITED(status))
699 {
700 Int_t returnCode = WEXITSTATUS(status);
701
702 Log("SHUTTLE", Form("The return code is %d", returnCode));
703
704 if (returnCode != 0)
705 hasError = kTRUE;
706 }
707 }
708 else if (pid == 0)
709 {
710 // client
711 AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName()));
712
713 UInt_t result = ProcessCurrentDetector();
714
715 Int_t returnCode = 0; // will be set to 1 in case of an error
716
717 if (!result)
718 {
719 returnCode = 1;
720 AliInfo(Form("\n \t\t\t****** run %d - %s: PREPROCESSOR ERROR ****** \n\n",
721 GetCurrentRun(), aDetector->GetName()));
722 }
723 else if (result == 2)
724 {
725 AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n",
726 GetCurrentRun(), aDetector->GetName()));
727 } else
728 {
729 AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n",
730 GetCurrentRun(), aDetector->GetName()));
731 }
732
733 if (result > 0)
734 {
2c15234c 735 // Process successful: Update time_processed field in FXS logbooks!
9d733021 736 if (UpdateTable() == kFALSE) returnCode = 1;
be48e3ea 737 }
738
4b95672b 739 for (UInt_t iSys=0; iSys<3; iSys++)
740 {
741 if (fFXSCalled[iSys]) fFXSlist[iSys].Clear();
742 }
743
be48e3ea 744 AliInfo(Form("Client process of %d - %s is exiting now with %d.",
745 GetCurrentRun(), aDetector->GetName(), returnCode));
746
747 // the client exits here
748 gSystem->Exit(returnCode);
749
750 AliError("We should never get here!!!");
751 }
7bfb2090 752 }
5164a766 753
2bb7b766 754 AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n",
755 GetCurrentRun()));
756
757 //check if shuttle is done for this run, if so update logbook
758 TObjArray checkEntryArray;
759 checkEntryArray.SetOwner(1);
9e080f92 760 TString whereClause = Form("where run=%d", GetCurrentRun());
761 if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
762 Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
763 GetCurrentRun()));
764 return hasError == kFALSE;
765 }
b948db8d 766
9e080f92 767 AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*>
768 (checkEntryArray.At(0));
2bb7b766 769
9e080f92 770 if (checkEntry)
771 {
772 if (checkEntry->IsDone())
be48e3ea 773 {
9e080f92 774 Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook");
775 UpdateShuttleLogbook("shuttle_done");
776 }
777 else
778 {
779 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
be48e3ea 780 {
9e080f92 781 if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
be48e3ea 782 {
9e080f92 783 AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
784 checkEntry->GetRun(), GetDetName(iDet)));
785 fFirstUnprocessed[iDet] = kFALSE;
be48e3ea 786 }
787 }
2bb7b766 788 }
789 }
790
e7f62f16 791 // remove ML instance
792 delete fMonaLisa;
793 fMonaLisa = 0;
794
2bb7b766 795 fLogbookEntry = 0;
85a80aa9 796
a7160fe9 797 return hasError == kFALSE;
73abe331 798}
799
b948db8d 800//______________________________________________________________________________________________
2bb7b766 801UInt_t AliShuttle::ProcessCurrentDetector()
73abe331 802{
803 //
2bb7b766 804 // Makes data retrieval just for a specific detector (fCurrentDetector).
73abe331 805 // Threre should be a configuration for this detector.
73abe331 806
2bb7b766 807 AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun()));
73abe331 808
7bfb2090 809 UpdateShuttleStatus(AliShuttleStatus::kDCSStarted);
73abe331 810
2c15234c 811 TMap dcsMap;
812 dcsMap.SetOwner(1);
73abe331 813
85a80aa9 814 Bool_t aDCSError = kFALSE;
815 fGridError = kFALSE;
d477ad88 816
2c15234c 817 // TODO Test only... I've added a flag that allows to
818 // exclude DCS archive DB query
819 if (!fgkProcessDCS)
820 {
821 AliInfo("Skipping DCS processing!");
822 aDCSError = kFALSE;
823 } else {
824 TString host(fConfig->GetDCSHost(fCurrentDetector));
825 Int_t port = fConfig->GetDCSPort(fCurrentDetector);
826
827 // Retrieval of Aliases
828 TObjString* anAlias = 0;
36c99a6a 829 Int_t iAlias = 1;
830 Int_t nTotAliases= ((TMap*)fConfig->GetDCSAliases(fCurrentDetector))->GetEntries();
2c15234c 831 TIter iterAliases(fConfig->GetDCSAliases(fCurrentDetector));
832 while ((anAlias = (TObjString*) iterAliases.Next()))
833 {
834 TObjArray *valueSet = new TObjArray();
835 valueSet->SetOwner(1);
836
36c99a6a 837 if (((iAlias-1) % 500) == 0 || iAlias == nTotAliases)
838 AliInfo(Form("Querying DCS archive: alias %s (%d of %d)",
839 anAlias->GetName(), iAlias++, nTotAliases));
2c15234c 840 aDCSError = (GetValueSet(host, port, anAlias->String(), valueSet, kAlias) == 0);
841
842 if(!aDCSError)
843 {
844 dcsMap.Add(anAlias->Clone(), valueSet);
845 } else {
846 Log(fCurrentDetector,
847 Form("ProcessCurrentDetector - Error while retrieving alias %s",
848 anAlias->GetName()));
849 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
850 dcsMap.DeleteAll();
851 return 0;
852 }
4f0ab988 853 }
2c15234c 854
855 // Retrieval of Data Points
856 TObjString* aDP = 0;
36c99a6a 857 Int_t iDP = 0;
858 Int_t nTotDPs= ((TMap*)fConfig->GetDCSDataPoints(fCurrentDetector))->GetEntries();
2c15234c 859 TIter iterDP(fConfig->GetDCSDataPoints(fCurrentDetector));
860 while ((aDP = (TObjString*) iterDP.Next()))
861 {
862 TObjArray *valueSet = new TObjArray();
863 valueSet->SetOwner(1);
36c99a6a 864 if (((iDP-1) % 500) == 0 || iDP == nTotDPs)
865 AliInfo(Form("Querying DCS archive: DP %s (%d of %d)",
866 aDP->GetName(), iDP++, nTotDPs));
2c15234c 867 aDCSError = (GetValueSet(host, port, aDP->String(), valueSet, kDP) == 0);
868
869 if(!aDCSError)
870 {
871 dcsMap.Add(aDP->Clone(), valueSet);
872 } else {
873 Log(fCurrentDetector,
874 Form("ProcessCurrentDetector - Error while retrieving data point %s",
875 aDP->GetName()));
876 UpdateShuttleStatus(AliShuttleStatus::kDCSError);
877 dcsMap.DeleteAll();
878 return 0;
879 }
73abe331 880 }
881 }
b948db8d 882
2bb7b766 883 // DCS Archive DB processing successful. Call Preprocessor!
85a80aa9 884 UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
a7160fe9 885
85a80aa9 886 AliPreprocessor* aPreprocessor =
5164a766 887 dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
b948db8d 888
2bb7b766 889 aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime());
2c15234c 890 UInt_t aPPResult = aPreprocessor->Process(&dcsMap);
2bb7b766 891
892 UInt_t returnValue = 0;
85a80aa9 893 if (aPPResult == 0) { // Preprocessor error
cb343cfd 894 UpdateShuttleStatus(AliShuttleStatus::kPPError);
2bb7b766 895 returnValue = 0;
85a80aa9 896 } else if (fGridError == kFALSE) { // process and Grid storage ok!
897 UpdateShuttleStatus(AliShuttleStatus::kDone);
2bb7b766 898 UpdateShuttleLogbook(fCurrentDetector, "DONE");
899 Log(fCurrentDetector.Data(),
900 "ProcessCurrentDetector - Preprocessor and Grid storage ended successfully");
901 returnValue = 1;
85a80aa9 902 } else { // Grid storage error (process ok, but object put in local storage)
903 UpdateShuttleStatus(AliShuttleStatus::kStoreFailed);
2bb7b766 904 returnValue = 2;
85a80aa9 905 }
b948db8d 906
2c15234c 907 dcsMap.DeleteAll();
b948db8d 908
2bb7b766 909 return returnValue;
910}
911
912//______________________________________________________________________________________________
913Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
914 TObjArray& entries)
915{
916// Query DAQ's Shuttle logbook and fills detector status object.
917// Call QueryRunParameters to query DAQ logbook for run parameters.
918
fc5a4708 919 entries.SetOwner(1);
920
2bb7b766 921 // check connection, in case connect
be48e3ea 922 if(!Connect(3)) return kFALSE;
2bb7b766 923
924 TString sqlQuery;
925 sqlQuery = Form("select * from logbook_shuttle %s order by run", whereClause);
926
be48e3ea 927 TSQLResult* aResult = fServer[3]->Query(sqlQuery);
2bb7b766 928 if (!aResult) {
929 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
930 return kFALSE;
931 }
932
fc5a4708 933 AliDebug(2,Form("Query = %s", sqlQuery.Data()));
934
2bb7b766 935 if(aResult->GetRowCount() == 0) {
9e080f92 936// if(sqlQuery.EndsWith("where shuttle_done=0 order by run")){
937// Log("SHUTTLE", "QueryShuttleLogbook - All runs in Shuttle Logbook are already DONE");
938// delete aResult;
939// return kTRUE;
940// } else {
941 AliInfo("No entries in Shuttle Logbook match request");
2bb7b766 942 delete aResult;
943 return kTRUE;
9e080f92 944// }
2bb7b766 945 }
946
947 // TODO Check field count!
fc5a4708 948 const UInt_t nCols = 22;
2bb7b766 949 if (aResult->GetFieldCount() != (Int_t) nCols) {
950 AliError("Invalid SQL result field number!");
951 delete aResult;
952 return kFALSE;
953 }
954
2bb7b766 955 TSQLRow* aRow;
956 while ((aRow = aResult->Next())) {
957 TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
958 Int_t run = runString.Atoi();
959
eba76848 960 AliShuttleLogbookEntry *entry = QueryRunParameters(run);
961 if (!entry)
962 continue;
2bb7b766 963
964 // loop on detectors
eba76848 965 for(UInt_t ii = 0; ii < nCols; ii++)
966 entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii));
2bb7b766 967
eba76848 968 entries.AddLast(entry);
2bb7b766 969 delete aRow;
970 }
971
9e080f92 972// if(sqlQuery.EndsWith("where shuttle_done=0 order by run"))
973// Log("SHUTTLE", Form("QueryShuttleLogbook - Found %d unprocessed runs in Shuttle Logbook",
974// entries.GetEntriesFast()));
2bb7b766 975 delete aResult;
976 return kTRUE;
977}
978
979//______________________________________________________________________________________________
eba76848 980AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
2bb7b766 981{
eba76848 982 //
983 // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object
984 //
2bb7b766 985
986 // check connection, in case connect
be48e3ea 987 if (!Connect(3))
eba76848 988 return 0;
2bb7b766 989
990 TString sqlQuery;
2c15234c 991 sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run);
2bb7b766 992
be48e3ea 993 TSQLResult* aResult = fServer[3]->Query(sqlQuery);
2bb7b766 994 if (!aResult) {
995 AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
eba76848 996 return 0;
2bb7b766 997 }
998
eba76848 999 if (aResult->GetRowCount() == 0) {
2bb7b766 1000 Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run));
1001 delete aResult;
eba76848 1002 return 0;
2bb7b766 1003 }
1004
eba76848 1005 if (aResult->GetRowCount() > 1) {
2bb7b766 1006 AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run));
1007 delete aResult;
eba76848 1008 return 0;
2bb7b766 1009 }
1010
eba76848 1011 TSQLRow* aRow = aResult->Next();
1012 if (!aRow)
1013 {
1014 AliError(Form("Could not retrieve row for run %d. Skipping", run));
1015 delete aResult;
1016 return 0;
1017 }
2bb7b766 1018
eba76848 1019 AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run);
2bb7b766 1020
eba76848 1021 for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++)
1022 entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii));
2bb7b766 1023
eba76848 1024 UInt_t startTime = entry->GetStartTime();
1025 UInt_t endTime = entry->GetEndTime();
1026
1027 if (!startTime || !endTime || startTime > endTime) {
1028 Log("SHUTTLE",
1029 Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
1030 run, startTime, endTime));
1031 delete entry;
2bb7b766 1032 delete aRow;
eba76848 1033 delete aResult;
1034 return 0;
2bb7b766 1035 }
1036
eba76848 1037 delete aRow;
2bb7b766 1038 delete aResult;
eba76848 1039
1040 return entry;
2bb7b766 1041}
1042
1043//______________________________________________________________________________________________
1044Bool_t AliShuttle::TryToStoreAgain()
1045{
1046 // Called in case the detector failed to store the object in Grid OCDB
1047 // It tries to store the object again, if it does not find more recent and overlapping objects
1048 // Calls underlying TryToStoreAgain(const char*) function twice, for OCDB and Reference storage.
1049
1050 AliInfo("Trying to store OCDB data again...");
1051 Bool_t resultCDB = TryToStoreAgain(fgkMainCDB);
1052
1053 AliInfo("Trying to store reference data again...");
1054 Bool_t resultRef = TryToStoreAgain(fgkMainRefStorage);
1055
1056 return resultCDB && resultRef;
1057}
1058
1059//______________________________________________________________________________________________
1060Bool_t AliShuttle::TryToStoreAgain(TString& gridURI)
1061{
1062 // Called by TryToStoreAgain(), performs actual storage retry
1063
6ec0e06c 1064 TObjArray* gridIds=0;
2bb7b766 1065
1066 Bool_t result = kTRUE;
1067
1068 const char* type = 0;
1069 TString backupURI;
1070 if(gridURI == fgkMainCDB) {
1071 type = "OCDB";
1072 backupURI = fgkLocalCDB;
1073 } else if(gridURI == fgkMainRefStorage) {
1074 type = "reference";
1075 backupURI = fgkLocalRefStorage;
1076 } else {
1077 AliError(Form("Invalid storage URI: %s", gridURI.Data()));
1078 return kFALSE;
1079 }
1080
1081 AliCDBManager* man = AliCDBManager::Instance();
1082
1083 AliCDBStorage *gridSto = man->GetStorage(gridURI);
1084 if(!gridSto) {
1085 Log(fCurrentDetector.Data(),
1086 Form("TryToStoreAgain - cannot activate main %s storage", type));
1087 return kFALSE;
1088 }
1089
1090 gridIds = gridSto->GetQueryCDBList();
1091
1092 // get objects previously stored in local CDB
1093 AliCDBStorage *backupSto = man->GetStorage(backupURI);
eba76848 1094 AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*");
2bb7b766 1095 // Local objects were stored with current run as Grid version!
1096 TList* localEntries = backupSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun());
1097 localEntries->SetOwner(1);
1098
1099 // loop on local stored objects
1100 TIter localIter(localEntries);
1101 AliCDBEntry *aLocEntry = 0;
1102 while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){
1103 aLocEntry->SetOwner(1);
1104 AliCDBId aLocId = aLocEntry->GetId();
1105 aLocEntry->SetVersion(-1);
1106 aLocEntry->SetSubVersion(-1);
1107
1108 // loop on Grid valid Id's
1109 Bool_t store = kTRUE;
1110 TIter gridIter(gridIds);
1111 AliCDBId* aGridId = 0;
1112 while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){
be48e3ea 1113 // If local object is valid up to infinity we store it only if it is
1114 // the first unprocessed run!
1115 if (aLocId.GetLastRun() == AliCDBRunRange::Infinity())
1116 {
1117 if (!fFirstUnprocessed[GetDetPos(fCurrentDetector)])
1118 {
1119 Log(fCurrentDetector.Data(),
2c15234c 1120 ("TryToStoreAgain - This object has validity infinite but "
1121 "there are previous unprocessed runs!"));
be48e3ea 1122 continue;
1123 } else {
1124 break;
1125 }
2bb7b766 1126 }
1127 if(aGridId->GetPath() != aLocId.GetPath()) continue;
1128 // skip all objects valid up to infinity
1129 if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue;
1130 // if we get here, it means there's already some more recent object stored on Grid!
1131 store = kFALSE;
1132 break;
1133 }
1134
1135 if(!store){
1136 Log(fCurrentDetector.Data(),
1137 Form("TryToStoreAgain - A more recent object already exists in %s storage: <%s>",
1138 type, aGridId->ToString().Data()));
1139 // removing local filename...
1140 // TODO maybe it's better not to remove it, it was not copied to the Grid!
1141 TString filename;
1142 backupSto->IdToFilename(aLocId, filename);
1143 AliInfo(Form("Removing local file %s", filename.Data()));
1144 gSystem->Exec(Form("rm %s",filename.Data()));
1145 continue;
1146 }
1147
1148 // If we get here, the file can be stored!
1149 Bool_t storeOk = gridSto->Put(aLocEntry);
1150 if(storeOk){
1151 Log(fCurrentDetector.Data(),
1152 Form("TryToStoreAgain - Object <%s> successfully put into %s storage",
1153 aLocId.ToString().Data(), type));
1154
1155 // removing local filename...
1156 TString filename;
1157 backupSto->IdToFilename(aLocId, filename);
1158 AliInfo(Form("Removing local file %s", filename.Data()));
1159 gSystem->Exec(Form("rm %s", filename.Data()));
1160 continue;
1161 } else {
1162 Log(fCurrentDetector.Data(),
1163 Form("TryToStoreAgain - Grid %s storage of object <%s> failed again",
1164 type, aLocId.ToString().Data()));
1165 result = kFALSE;
1166 }
1167 }
1168 localEntries->Clear();
1169
1170 return result;
73abe331 1171}
1172
b948db8d 1173//______________________________________________________________________________________________
2c15234c 1174Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry,
1175 TObjArray* valueSet, DCSType type)
73abe331 1176{
2c15234c 1177// Retrieve all "entry" data points from the DCS server
58bc3020 1178// host, port: TSocket connection parameters
2c15234c 1179// entry: name of the alias or data point
2bb7b766 1180// valueSet: array of retrieved AliDCSValue's
2c15234c 1181// type: kAlias or kDP
58bc3020 1182
73abe331 1183 AliDCSClient client(host, port, fTimeout, fRetries);
2c15234c 1184 if (!client.IsConnected())
1185 {
b948db8d 1186 return kFALSE;
73abe331 1187 }
1188
2c15234c 1189 Int_t result=0;
73abe331 1190
2c15234c 1191 if (type == kAlias)
1192 {
1193 result = client.GetAliasValues(entry,
1194 GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1195 } else
1196 if (type == kDP)
1197 {
1198 result = client.GetDPValues(entry,
1199 GetCurrentStartTime(), GetCurrentEndTime(), valueSet);
1200 }
1201
1202 if (result < 0)
1203 {
2bb7b766 1204 Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s",
2c15234c 1205 entry, AliDCSClient::GetErrorString(result)));
73abe331 1206
2c15234c 1207 if (result == AliDCSClient::fgkServerError)
1208 {
2bb7b766 1209 Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s",
73abe331 1210 client.GetServerError().Data()));
1211 }
1212
1213 return kFALSE;
1214 }
1215
1216 return kTRUE;
1217}
b948db8d 1218
1219//______________________________________________________________________________________________
57f50b3c 1220const char* AliShuttle::GetFile(Int_t system, const char* detector,
1221 const char* id, const char* source)
b948db8d 1222{
57f50b3c 1223// Get calibration file from file exchange servers
9d733021 1224// First queris the FXS database for the file name, using the run, detector, id and source info
1225// then calls RetrieveFile(filename) for actual copy to local disk
2bb7b766 1226// run: current run being processed (given by Logbook entry fLogbookEntry)
eba76848 1227// detector: the Preprocessor name
57f50b3c 1228// id: provided as a parameter by the Preprocessor
1229// source: provided by the Preprocessor through GetFileSources function
1230
1231 // check connection, in case connect
9d733021 1232 if (!Connect(system))
eba76848 1233 {
9d733021 1234 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
57f50b3c 1235 return 0;
1236 }
1237
1238 // Query preparation
9d733021 1239 TString sqlQueryStart;
1240 TString whereClause;
1241 TString sourceName(source);
1242 Int_t nFields = 0;
1243 if (system == kDAQ)
1244 {
1245 sqlQueryStart = Form("select filePath,size from %s where", fConfig->GetFXSdbTable(system));
1246 whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\"",
eba76848 1247 GetCurrentRun(), detector, id, source);
9d733021 1248 nFields = 2;
57f50b3c 1249
57f50b3c 1250 }
9d733021 1251 else if (system == kDCS)
eba76848 1252 {
9d733021 1253 sqlQueryStart = Form("select filePath,size from %s where", fConfig->GetFXSdbTable(system));
1254 whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
eba76848 1255 GetCurrentRun(), detector, id);
9d733021 1256 nFields = 2;
1257 sourceName="none";
57f50b3c 1258 }
9d733021 1259 else if (system == kHLT)
9e080f92 1260 {
9d733021 1261 sqlQueryStart = Form("select filePath,fileSize,fileChecksum from %s where",
1262 fConfig->GetFXSdbTable(system));
1263 whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\"",
1264 GetCurrentRun(), detector, id, source);
1265 nFields = 3;
9e080f92 1266 }
1267
9e080f92 1268 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1269
1270 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1271
1272 // Query execution
1273 TSQLResult* aResult = 0;
9d733021 1274 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
9e080f92 1275 if (!aResult) {
9d733021 1276 Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s",
1277 GetSystemName(system), id, sourceName.Data()));
9e080f92 1278 return 0;
1279 }
1280
1281 if(aResult->GetRowCount() == 0)
1282 {
1283 Log(detector,
9d733021 1284 Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s",
1285 GetSystemName(system), id, sourceName.Data()));
9e080f92 1286 delete aResult;
1287 return 0;
1288 }
2bb7b766 1289
9e080f92 1290 if (aResult->GetRowCount() > 1) {
1291 Log(detector,
9d733021 1292 Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s",
1293 GetSystemName(system), id, sourceName.Data()));
9e080f92 1294 delete aResult;
1295 return 0;
1296 }
1297
9d733021 1298 if (aResult->GetFieldCount() != nFields) {
9e080f92 1299 Log(detector,
9d733021 1300 Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s",
1301 GetSystemName(system), id, sourceName.Data()));
9e080f92 1302 delete aResult;
1303 return 0;
1304 }
1305
1306 TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next());
1307
1308 if (!aRow){
9d733021 1309 Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s",
1310 GetSystemName(system), id, sourceName.Data()));
9e080f92 1311 delete aResult;
1312 return 0;
1313 }
1314
1315 TString filePath(aRow->GetField(0), aRow->GetFieldLength(0));
1316 TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1));
9d733021 1317 TString fileMd5Sum;
1318 if(system == kHLT) fileMd5Sum = aRow->GetField(2);
9e080f92 1319
1320 delete aResult;
1321 delete aRow;
1322
1323 AliDebug(2, Form("filePath = %s",filePath.Data()));
1324
9e080f92 1325 // retrieved file is renamed to make it unique
9d733021 1326 TString localFileName = Form("%s_%s_%d_%s_%s.shuttle",
1327 GetSystemName(system), detector, GetCurrentRun(), id, sourceName.Data());
1328
9e080f92 1329
9d733021 1330 // file retrieval from FXS
4b95672b 1331 UInt_t nRetries = 0;
1332 UInt_t maxRetries = 3;
1333 Bool_t result = kFALSE;
1334
1335 // copy!! if successful TSystem::Exec returns 0
1336 while(nRetries++ < maxRetries) {
1337 AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries));
1338 result = RetrieveFile(system, filePath.Data(), localFileName.Data());
1339 if(!result)
1340 {
1341 Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed",
9d733021 1342 filePath.Data(), GetSystemName(system)));
4b95672b 1343 continue;
1344 } else {
1345 AliInfo(Form("File %s copied from %s FXS into %s/%s",
1346 filePath.Data(), GetSystemName(system),
1347 GetShuttleTempDir(), localFileName.Data()));
1348 }
9e080f92 1349
4b95672b 1350 if (system == kHLT)
1351 {
1352 // compare md5sum of local file with the one stored in the FXS DB
1353 Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null",
36c99a6a 1354 GetShuttleTempDir(), localFileName.Data(), fileMd5Sum.Data()));
9e080f92 1355
4b95672b 1356 if (md5Comp != 0)
1357 {
1358 Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!",
1359 filePath.Data()));
1360 result = kFALSE;
1361 continue;
1362 }
9d733021 1363 }
4b95672b 1364 if (result) break;
9e080f92 1365 }
1366
4b95672b 1367 if(!result) return 0;
1368
9d733021 1369 fFXSCalled[system]=kTRUE;
1370 TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
1371 fFXSlist[system].Add(fileParams);
9e080f92 1372
1373 static TString fullLocalFileName;
36c99a6a 1374 fullLocalFileName = TString::Format("%s/%s", GetShuttleTempDir(), localFileName.Data());
1375
9e080f92 1376 AliInfo(Form("fullLocalFileName = %s", fullLocalFileName.Data()));
1377
1378 return fullLocalFileName.Data();
2bb7b766 1379
1380}
1381
1382//______________________________________________________________________________________________
9d733021 1383Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName)
9e080f92 1384{
9d733021 1385// Copies file from FXS to local Shuttle machine
2bb7b766 1386
9e080f92 1387 // check temp directory: trying to cd to temp; if it does not exist, create it
9d733021 1388 AliDebug(2, Form("Copy file %s from %s FXS into %s/%s",
1389 GetSystemName(system), fxsFileName, GetShuttleTempDir(), localFileName));
9e080f92 1390
36c99a6a 1391 void* dir = gSystem->OpenDirectory(GetShuttleTempDir());
9e080f92 1392 if (dir == NULL) {
36c99a6a 1393 if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) {
1394 AliError(Form("Can't open directory <%s>", GetShuttleTempDir()));
9e080f92 1395 return kFALSE;
1396 }
1397
1398 } else {
1399 gSystem->FreeDirectory(dir);
1400 }
1401
9d733021 1402 TString baseFXSFolder;
1403 if (system == kDAQ)
1404 {
1405 baseFXSFolder = "FES/";
1406 }
1407 else if (system == kDCS)
1408 {
1409 baseFXSFolder = "";
1410 }
1411 else if (system == kHLT)
1412 {
1413 baseFXSFolder = "~/";
1414 }
1415
1416
1417 TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s/%s",
1418 fConfig->GetFXSPort(system),
1419 fConfig->GetFXSUser(system),
1420 fConfig->GetFXSHost(system),
1421 baseFXSFolder.Data(),
1422 fxsFileName,
36c99a6a 1423 GetShuttleTempDir(),
9e080f92 1424 localFileName);
1425
1426 AliDebug(2, Form("%s",command.Data()));
1427
4b95672b 1428 Bool_t result = (gSystem->Exec(command.Data()) == 0);
9e080f92 1429
4b95672b 1430 return result;
9e080f92 1431}
1432
1433//______________________________________________________________________________________________
9d733021 1434TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id)
1435{
1436// Get sources producing the condition file Id from file exchange servers
1437
1438 if (system == kDCS)
1439 {
1440 AliError("DCS system has only one source of data!");
1441 return NULL;
1442
1443 }
9e080f92 1444
1445 // check connection, in case connect
9d733021 1446 if (!Connect(system))
1447 {
1448 Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system)));
1449 return NULL;
9e080f92 1450 }
1451
9d733021 1452 TString sourceName = 0;
1453 if (system == kDAQ)
1454 {
1455 sourceName = "DAQsource";
1456 } else if (system == kHLT)
1457 {
1458 sourceName = "DDLnumbers";
1459 }
1460
1461 TString sqlQueryStart = Form("select %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(kDAQ));
9e080f92 1462 TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"",
1463 GetCurrentRun(), detector, id);
1464 TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data());
1465
1466 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1467
1468 // Query execution
1469 TSQLResult* aResult;
9d733021 1470 aResult = fServer[system]->Query(sqlQuery);
9e080f92 1471 if (!aResult) {
9d733021 1472 Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s",
1473 GetSystemName(system), id));
9e080f92 1474 return 0;
1475 }
1476
9d733021 1477 if (aResult->GetRowCount() == 0)
1478 {
9e080f92 1479 Log(detector,
9d733021 1480 Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id));
9e080f92 1481 delete aResult;
1482 return 0;
1483 }
1484
1485 TSQLRow* aRow;
1486 TList *list = new TList();
1487 list->SetOwner(1);
1488
9d733021 1489 while ((aRow = aResult->Next()))
1490 {
9e080f92 1491
9d733021 1492 TString source(aRow->GetField(0), aRow->GetFieldLength(0));
1493 AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data()));
1494 list->Add(new TObjString(source));
9e080f92 1495 delete aRow;
1496 }
9d733021 1497
9e080f92 1498 delete aResult;
1499
1500 return list;
2bb7b766 1501}
1502
1503//______________________________________________________________________________________________
9d733021 1504Bool_t AliShuttle::Connect(Int_t system)
2bb7b766 1505{
9d733021 1506// Connect to MySQL Server of the system's FXS MySQL databases
1507// DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host
57f50b3c 1508
9d733021 1509 // check connection: if already connected return
1510 if(fServer[system] && fServer[system]->IsConnected()) return kTRUE;
57f50b3c 1511
9d733021 1512 TString dbHost, dbUser, dbPass, dbName;
57f50b3c 1513
9d733021 1514 if (system < 3) // FXS db servers
1515 {
1516 dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system));
1517 dbUser = fConfig->GetFXSdbUser(system);
1518 dbPass = fConfig->GetFXSdbPass(system);
1519 dbName = fConfig->GetFXSdbName(system);
1520 } else { // Run & Shuttle logbook servers
1521 // TODO Will the Shuttle logbook server be the same as the Run logbook server ???
1522 dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort());
1523 dbUser = fConfig->GetDAQlbUser();
1524 dbPass = fConfig->GetDAQlbPass();
1525 dbName = fConfig->GetDAQlbDB();
1526 }
57f50b3c 1527
9d733021 1528 fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data());
1529 if (!fServer[system] || !fServer[system]->IsConnected()) {
1530 if(system < 3)
1531 {
1532 AliError(Form("Can't establish connection to FXS database for %s",
1533 AliShuttleInterface::GetSystemName(system)));
1534 } else {
1535 AliError("Can't establish connection to Run logbook.");
57f50b3c 1536 }
9d733021 1537 if(fServer[system]) delete fServer[system];
1538 return kFALSE;
2bb7b766 1539 }
57f50b3c 1540
9d733021 1541 // Get tables
1542 TSQLResult* aResult=0;
1543 switch(system){
1544 case kDAQ:
1545 aResult = fServer[kDAQ]->GetTables(dbName.Data());
1546 break;
1547 case kDCS:
1548 aResult = fServer[kDCS]->GetTables(dbName.Data());
1549 break;
1550 case kHLT:
1551 aResult = fServer[kHLT]->GetTables(dbName.Data());
1552 break;
1553 default:
1554 aResult = fServer[3]->GetTables(dbName.Data());
1555 break;
1556 }
1557
1558 delete aResult;
2bb7b766 1559 return kTRUE;
1560}
57f50b3c 1561
9e080f92 1562//______________________________________________________________________________________________
9d733021 1563Bool_t AliShuttle::UpdateTable()
9e080f92 1564{
9d733021 1565// Update FXS table filling time_processed field in all rows corresponding to current run and detector
9e080f92 1566
9d733021 1567 Bool_t result = kTRUE;
9e080f92 1568
9d733021 1569 for (UInt_t system=0; system<3; system++)
1570 {
1571 if(!fFXSCalled[system]) continue;
9e080f92 1572
9d733021 1573 // check connection, in case connect
1574 if (!Connect(system))
1575 {
1576 Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system)));
1577 result = kFALSE;
1578 continue;
9e080f92 1579 }
9e080f92 1580
9d733021 1581 TTimeStamp now; // now
1582
1583 // Loop on FXS list entries
1584 TIter iter(&fFXSlist[system]);
1585 TObjString *aFXSentry=0;
1586 while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next())))
1587 {
1588 TString aFXSentrystr = aFXSentry->String();
1589 TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#");
1590 if (!aFXSarray || aFXSarray->GetEntries() != 2 )
1591 {
1592 Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>",
1593 GetSystemName(system), aFXSentrystr.Data()));
1594 if(aFXSarray) delete aFXSarray;
1595 result = kFALSE;
1596 continue;
1597 }
1598 const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName();
1599 const char* source = ((TObjString*) aFXSarray->At(1))->GetName();
1600
1601 TString whereClause;
1602 if (system == kDAQ)
1603 {
1604 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";",
1605 GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
1606 }
1607 else if (system == kDCS)
1608 {
1609 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";",
1610 GetCurrentRun(), fCurrentDetector.Data(), fileId);
1611 }
1612 else if (system == kHLT)
1613 {
1614 whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";",
1615 GetCurrentRun(), fCurrentDetector.Data(), fileId, source);
1616 }
1617
1618 delete aFXSarray;
9e080f92 1619
9d733021 1620 TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system),
1621 now.GetSec(), whereClause.Data());
9e080f92 1622
9d733021 1623 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
9e080f92 1624
9d733021 1625 // Query execution
1626 TSQLResult* aResult;
1627 aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery));
1628 if (!aResult)
1629 {
1630 Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>",
1631 GetSystemName(system), sqlQuery.Data()));
1632 result = kFALSE;
1633 continue;
1634 }
1635 delete aResult;
9e080f92 1636 }
9e080f92 1637 }
1638
9d733021 1639 return result;
9e080f92 1640}
57f50b3c 1641
2bb7b766 1642//______________________________________________________________________________________________
1643Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status)
1644{
e7f62f16 1645 //
1646 // Update Shuttle logbook filling detector or shuttle_done column
1647 // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done")
1648 //
57f50b3c 1649
2bb7b766 1650 // check connection, in case connect
be48e3ea 1651 if(!Connect(3)){
2bb7b766 1652 Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook.");
1653 return kFALSE;
57f50b3c 1654 }
1655
2bb7b766 1656 TString detName(detector);
1657 TString setClause;
e7f62f16 1658 if(detName == "shuttle_done")
1659 {
2bb7b766 1660 setClause = "set shuttle_done=1";
e7f62f16 1661
1662 // Send the information to ML
1663 TMonaLisaText mlStatus("SHUTTLE_status", "Done");
1664
1665 TList mlList;
1666 mlList.Add(&mlStatus);
1667
1668 fMonaLisa->SendParameters(&mlList);
2bb7b766 1669 } else {
2bb7b766 1670 TString statusStr(status);
1671 if(statusStr.Contains("done", TString::kIgnoreCase) ||
1672 statusStr.Contains("failed", TString::kIgnoreCase)){
eba76848 1673 setClause = Form("set %s=\"%s\"", detector, status);
2bb7b766 1674 } else {
1675 Log("SHUTTLE",
1676 Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s",
1677 status, detector));
1678 return kFALSE;
1679 }
1680 }
57f50b3c 1681
2bb7b766 1682 TString whereClause = Form("where run=%d", GetCurrentRun());
1683
1684 TString sqlQuery = Form("update logbook_shuttle %s %s",
1685 setClause.Data(), whereClause.Data());
57f50b3c 1686
2bb7b766 1687 AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data()));
1688
1689 // Query execution
1690 TSQLResult* aResult;
be48e3ea 1691 aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery));
2bb7b766 1692 if (!aResult) {
1693 Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data()));
1694 return kFALSE;
57f50b3c 1695 }
2bb7b766 1696 delete aResult;
57f50b3c 1697
1698 return kTRUE;
1699}
1700
1701//______________________________________________________________________________________________
2bb7b766 1702Int_t AliShuttle::GetCurrentRun() const
1703{
1704// Get current run from logbook entry
57f50b3c 1705
2bb7b766 1706 return fLogbookEntry ? fLogbookEntry->GetRun() : -1;
57f50b3c 1707}
1708
1709//______________________________________________________________________________________________
2bb7b766 1710UInt_t AliShuttle::GetCurrentStartTime() const
1711{
1712// get current start time
57f50b3c 1713
2bb7b766 1714 return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0;
57f50b3c 1715}
1716
1717//______________________________________________________________________________________________
2bb7b766 1718UInt_t AliShuttle::GetCurrentEndTime() const
1719{
1720// get current end time from logbook entry
57f50b3c 1721
2bb7b766 1722 return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
57f50b3c 1723}
1724
b948db8d 1725//______________________________________________________________________________________________
1726void AliShuttle::Log(const char* detector, const char* message)
1727{
58bc3020 1728// Fill log string with a message
b948db8d 1729
36c99a6a 1730 void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
84090f85 1731 if (dir == NULL) {
36c99a6a 1732 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
1733 AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
84090f85 1734 return;
1735 }
b948db8d 1736
84090f85 1737 } else {
1738 gSystem->FreeDirectory(dir);
1739 }
b948db8d 1740
cb343cfd 1741 TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector);
e7f62f16 1742 if (GetCurrentRun() >= 0)
1743 toLog += Form("run %d - ", GetCurrentRun());
2bb7b766 1744 toLog += Form("%s", message);
1745
84090f85 1746 AliInfo(toLog.Data());
b948db8d 1747
84090f85 1748 TString fileName;
e7f62f16 1749 if (GetCurrentRun() >= 0)
1750 fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
1751 else
1752 fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
1753
84090f85 1754 gSystem->ExpandPathName(fileName);
1755
1756 ofstream logFile;
1757 logFile.open(fileName, ofstream::out | ofstream::app);
1758
1759 if (!logFile.is_open()) {
1760 AliError(Form("Could not open file %s", fileName.Data()));
1761 return;
1762 }
7bfb2090 1763
84090f85 1764 logFile << toLog.Data() << "\n";
b948db8d 1765
84090f85 1766 logFile.close();
b948db8d 1767}
2bb7b766 1768
2bb7b766 1769//______________________________________________________________________________________________
1770Bool_t AliShuttle::Collect(Int_t run)
1771{
eba76848 1772//
1773// Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default)
1774// If a dedicated run is given this run is processed
1775//
1776// In operational mode, this is the Shuttle function triggered by the EOR signal.
1777//
2bb7b766 1778
eba76848 1779 if (run == -1)
1780 Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs");
1781 else
1782 Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run));
cb343cfd 1783
1784 SetLastAction("Starting");
2bb7b766 1785
1786 TString whereClause("where shuttle_done=0");
eba76848 1787 if (run != -1)
1788 whereClause += Form(" and run=%d", run);
2bb7b766 1789
1790 TObjArray shuttleLogbookEntries;
be48e3ea 1791 if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries))
1792 {
cb343cfd 1793 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
2bb7b766 1794 return kFALSE;
1795 }
1796
9e080f92 1797 if (shuttleLogbookEntries.GetEntries() == 0)
1798 {
1799 if (run == -1)
1800 Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook");
1801 else
1802 Log("SHUTTLE", Form("Collect - Run %d is already DONE "
1803 "or it does not exist in Shuttle logbook", run));
1804 return kTRUE;
1805 }
1806
be48e3ea 1807 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1808 fFirstUnprocessed[iDet] = kTRUE;
1809
fc5a4708 1810 if (run != -1)
be48e3ea 1811 {
1812 // query Shuttle logbook for earlier runs, check if some detectors are unprocessed,
1813 // flag them into fFirstUnprocessed array
1814 TString whereClause(Form("where shuttle_done=0 and run < %d", run));
1815 TObjArray tmpLogbookEntries;
1816 if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries))
1817 {
1818 Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook");
1819 return kFALSE;
1820 }
1821
1822 TIter iter(&tmpLogbookEntries);
1823 AliShuttleLogbookEntry* anEntry = 0;
1824 while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next())))
1825 {
1826 for (UInt_t iDet=0; iDet<NDetectors(); iDet++)
1827 {
1828 if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed)
1829 {
1830 AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"",
1831 anEntry->GetRun(), GetDetName(iDet)));
1832 fFirstUnprocessed[iDet] = kFALSE;
1833 }
1834 }
1835
1836 }
1837
1838 }
1839
1840 if (!RetrieveConditionsData(shuttleLogbookEntries))
1841 {
cb343cfd 1842 Log("SHUTTLE", "Collect - Process of at least one run failed");
2bb7b766 1843 return kFALSE;
1844 }
1845
36c99a6a 1846 Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
eba76848 1847 return kTRUE;
2bb7b766 1848}
1849
2bb7b766 1850//______________________________________________________________________________________________
1851Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries)
1852{
1853// Retrieve conditions data for all runs that aren't processed yet
1854
1855 Bool_t hasError = kFALSE;
1856
1857 TIter iter(&dateEntries);
1858 AliShuttleLogbookEntry* anEntry;
1859
1860 while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){
1861 if (!Process(anEntry)){
1862 hasError = kTRUE;
1863 }
4b95672b 1864
1865 // clean SHUTTLE temp directory
1866 TString command = Form("rm -f %s/*.shuttle", GetShuttleTempDir());
1867 gSystem->Exec(command.Data());
2bb7b766 1868 }
1869
1870 return hasError == kFALSE;
1871}
cb343cfd 1872
1873//______________________________________________________________________________________________
1874ULong_t AliShuttle::GetTimeOfLastAction() const
1875{
1876 ULong_t tmp;
36c99a6a 1877
cb343cfd 1878 fMonitoringMutex->Lock();
be48e3ea 1879
cb343cfd 1880 tmp = fLastActionTime;
36c99a6a 1881
cb343cfd 1882 fMonitoringMutex->UnLock();
36c99a6a 1883
cb343cfd 1884 return tmp;
1885}
1886
1887//______________________________________________________________________________________________
1888const TString AliShuttle::GetLastAction() const
1889{
1890 // returns a string description of the last action
1891
1892 TString tmp;
36c99a6a 1893
cb343cfd 1894 fMonitoringMutex->Lock();
1895
1896 tmp = fLastAction;
1897
1898 fMonitoringMutex->UnLock();
1899
36c99a6a 1900 return tmp;
cb343cfd 1901}
1902
1903//______________________________________________________________________________________________
1904void AliShuttle::SetLastAction(const char* action)
1905{
1906 // updates the monitoring variables
36c99a6a 1907
cb343cfd 1908 fMonitoringMutex->Lock();
36c99a6a 1909
cb343cfd 1910 fLastAction = action;
1911 fLastActionTime = time(0);
1912
1913 fMonitoringMutex->UnLock();
1914}
eba76848 1915
1916//______________________________________________________________________________________________
1917const char* AliShuttle::GetRunParameter(const char* param)
1918{
1919// returns run parameter read from DAQ logbook
1920
1921 if(!fLogbookEntry) {
1922 AliError("No logbook entry!");
1923 return 0;
1924 }
1925
1926 return fLogbookEntry->GetRunParameter(param);
1927}
57c1a579 1928
1929//______________________________________________________________________________________________
1930Bool_t AliShuttle::SendMail()
1931{
1932// sends a mail to the subdetector expert in case of preprocessor error
1933
36c99a6a 1934 void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
57c1a579 1935 if (dir == NULL)
1936 {
36c99a6a 1937 if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
57c1a579 1938 {
36c99a6a 1939 AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
57c1a579 1940 return kFALSE;
1941 }
1942
1943 } else {
1944 gSystem->FreeDirectory(dir);
1945 }
1946
1947 TString bodyFileName;
36c99a6a 1948 bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
57c1a579 1949 gSystem->ExpandPathName(bodyFileName);
1950
1951 ofstream mailBody;
1952 mailBody.open(bodyFileName, ofstream::out);
1953
1954 if (!mailBody.is_open())
1955 {
1956 AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
1957 return kFALSE;
1958 }
1959
1960 TString to="";
1961 TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
1962 TObjString *anExpert=0;
1963 while ((anExpert = (TObjString*) iterExperts.Next()))
1964 {
1965 to += Form("%s,", anExpert->GetName());
1966 }
1967 to.Remove(to.Length()-1);
909732f7 1968 AliDebug(2, Form("to: %s",to.Data()));
57c1a579 1969
36c99a6a 1970 // TODO this will be removed...
1971 if (to.Contains("not_yet_set")) {
1972 AliInfo("List of detector responsibles not yet set!");
1973 return kFALSE;
1974 }
1975
57c1a579 1976 TString cc="alberto.colla@cern.ch";
1977
1978 TString subject = Form("%s Shuttle preprocessor error in run %d !",
1979 fCurrentDetector.Data(), GetCurrentRun());
909732f7 1980 AliDebug(2, Form("subject: %s", subject.Data()));
57c1a579 1981
1982 TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
1983 body += Form("SHUTTLE just detected that your preprocessor "
36c99a6a 1984 "exited with ERROR state in run %d!!\n\n", GetCurrentRun());
57c1a579 1985 body += Form("Please check %s status on the web page asap!\n\n", fCurrentDetector.Data());
1986 body += Form("The last 10 lines of %s log file are following:\n\n");
1987
909732f7 1988 AliDebug(2, Form("Body begin: %s", body.Data()));
57c1a579 1989
1990 mailBody << body.Data();
1991 mailBody.close();
1992 mailBody.open(bodyFileName, ofstream::out | ofstream::app);
1993
9d733021 1994 TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
57c1a579 1995 TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
1996 if (gSystem->Exec(tailCommand.Data()))
1997 {
1998 mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
1999 }
2000
2001 TString endBody = Form("------------------------------------------------------\n\n");
36c99a6a 2002 endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
2003 endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
57c1a579 2004 endBody += "Sincerely yours,\n\n \t\t\tthe SHUTTLE\n";
2005
909732f7 2006 AliDebug(2, Form("Body end: %s", endBody.Data()));
57c1a579 2007
2008 mailBody << endBody.Data();
2009
2010 mailBody.close();
2011
2012 // send mail!
2013 TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
2014 subject.Data(),
2015 cc.Data(),
2016 to.Data(),
2017 bodyFileName.Data());
909732f7 2018 AliDebug(2, Form("mail command: %s", mailCommand.Data()));
57c1a579 2019
2020 Bool_t result = gSystem->Exec(mailCommand.Data());
2021
2022 return result == 0;
2023}
36c99a6a 2024
2025//______________________________________________________________________________________________
2026void AliShuttle::SetShuttleTempDir(const char* tmpDir)
2027{
2028// sets Shuttle temp directory
2029
2030 fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir);
2031}
2032
2033//______________________________________________________________________________________________
2034void AliShuttle::SetShuttleLogDir(const char* logDir)
2035{
2036// sets Shuttle log directory
2037
2038 fgkShuttleLogDir = gSystem->ExpandPathName(logDir);
2039}