]>
Commit | Line | Data |
---|---|---|
73abe331 | 1 | /************************************************************************** |
2 | * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. * | |
3 | * * | |
4 | * Author: The ALICE Off-line Project. * | |
5 | * Contributors are mentioned in the code where appropriate. * | |
6 | * * | |
7 | * Permission to use, copy, modify and distribute this software and its * | |
8 | * documentation strictly for non-commercial purposes is hereby granted * | |
9 | * without fee, provided that the above copyright notice appears in all * | |
10 | * copies and that both the copyright notice and this permission notice * | |
11 | * appear in the supporting documentation. The authors make no claims * | |
12 | * about the suitability of this software for any purpose. It is * | |
13 | * provided "as is" without express or implied warranty. * | |
14 | **************************************************************************/ | |
15 | ||
16 | /* | |
17 | $Log$ | |
9d733021 | 18 | Revision 1.27 2007/01/30 17:52:42 jgrosseo |
19 | adding monalisa monitoring | |
20 | ||
e7f62f16 | 21 | Revision 1.26 2007/01/23 19:20:03 acolla |
22 | Removed old ldif files, added TOF, MCH ldif files. Added some options in | |
23 | AliShuttleConfig::Print. Added in Ali Shuttle: SetShuttleTempDir and | |
24 | SetShuttleLogDir | |
25 | ||
36c99a6a | 26 | Revision 1.25 2007/01/15 19:13:52 acolla |
27 | Moved some AliInfo to AliDebug in SendMail function | |
28 | ||
fc5a4708 | 29 | Revision 1.21 2006/12/07 08:51:26 jgrosseo |
30 | update (alberto): | |
31 | table, db names in ldap configuration | |
32 | added GRP preprocessor | |
33 | DCS data can also be retrieved by data point | |
34 | ||
2c15234c | 35 | Revision 1.20 2006/11/16 16:16:48 jgrosseo |
36 | introducing strict run ordering flag | |
37 | removed giving preprocessor name to preprocessor, they have to know their name themselves ;-) | |
38 | ||
be48e3ea | 39 | Revision 1.19 2006/11/06 14:23:04 jgrosseo |
40 | major update (Alberto) | |
41 | o) reading of run parameters from the logbook | |
42 | o) online offline naming conversion | |
43 | o) standalone DCSclient package | |
44 | ||
eba76848 | 45 | Revision 1.18 2006/10/20 15:22:59 jgrosseo |
46 | o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child | |
47 | o) Merging Collect, CollectAll, CollectNew function | |
48 | o) Removing implementation of empty copy constructors (declaration still there!) | |
49 | ||
cb343cfd | 50 | Revision 1.17 2006/10/05 16:20:55 jgrosseo |
51 | adapting to new CDB classes | |
52 | ||
6ec0e06c | 53 | Revision 1.16 2006/10/05 15:46:26 jgrosseo |
54 | applying to the new interface | |
55 | ||
481441a2 | 56 | Revision 1.15 2006/10/02 16:38:39 jgrosseo |
57 | update (alberto): | |
58 | fixed memory leaks | |
59 | storing of objects that failed to be stored to the grid before | |
60 | interfacing of shuttle status table in daq system | |
61 | ||
2bb7b766 | 62 | Revision 1.14 2006/08/29 09:16:05 jgrosseo |
63 | small update | |
64 | ||
85a80aa9 | 65 | Revision 1.13 2006/08/15 10:50:00 jgrosseo |
66 | effc++ corrections (alberto) | |
67 | ||
4f0ab988 | 68 | Revision 1.12 2006/08/08 14:19:29 jgrosseo |
69 | Update to shuttle classes (Alberto) | |
70 | ||
71 | - Possibility to set the full object's path in the Preprocessor's and | |
72 | Shuttle's Store functions | |
73 | - Possibility to extend the object's run validity in the same classes | |
74 | ("startValidity" and "validityInfinite" parameters) | |
75 | - Implementation of the StoreReferenceData function to store reference | |
76 | data in a dedicated CDB storage. | |
77 | ||
84090f85 | 78 | Revision 1.11 2006/07/21 07:37:20 jgrosseo |
79 | last run is stored after each run | |
80 | ||
7bfb2090 | 81 | Revision 1.10 2006/07/20 09:54:40 jgrosseo |
82 | introducing status management: The processing per subdetector is divided into several steps, | |
83 | after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle | |
84 | can keep track of the number of failures and skips further processing after a certain threshold is | |
85 | exceeded. These thresholds can be configured in LDAP. | |
86 | ||
5164a766 | 87 | Revision 1.9 2006/07/19 10:09:55 jgrosseo |
88 | new configuration, accesst to DAQ FES (Alberto) | |
89 | ||
57f50b3c | 90 | Revision 1.8 2006/07/11 12:44:36 jgrosseo |
91 | adding parameters for extended validity range of data produced by preprocessor | |
92 | ||
17111222 | 93 | Revision 1.7 2006/07/10 14:37:09 jgrosseo |
94 | small fix + todo comment | |
95 | ||
e090413b | 96 | Revision 1.6 2006/07/10 13:01:41 jgrosseo |
97 | enhanced storing of last sucessfully processed run (alberto) | |
98 | ||
a7160fe9 | 99 | Revision 1.5 2006/07/04 14:59:57 jgrosseo |
100 | revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2 | |
101 | ||
45a493ce | 102 | Revision 1.4 2006/06/12 09:11:16 jgrosseo |
103 | coding conventions (Alberto) | |
104 | ||
58bc3020 | 105 | Revision 1.3 2006/06/06 14:26:40 jgrosseo |
106 | o) removed files that were moved to STEER | |
107 | o) shuttle updated to follow the new interface (Alberto) | |
108 | ||
b948db8d | 109 | Revision 1.2 2006/03/07 07:52:34 hristov |
110 | New version (B.Yordanov) | |
111 | ||
d477ad88 | 112 | Revision 1.6 2005/11/19 17:19:14 byordano |
113 | RetrieveDATEEntries and RetrieveConditionsData added | |
114 | ||
115 | Revision 1.5 2005/11/19 11:09:27 byordano | |
116 | AliShuttle declaration added | |
117 | ||
118 | Revision 1.4 2005/11/17 17:47:34 byordano | |
119 | TList changed to TObjArray | |
120 | ||
121 | Revision 1.3 2005/11/17 14:43:23 byordano | |
122 | import to local CVS | |
123 | ||
124 | Revision 1.1.1.1 2005/10/28 07:33:58 hristov | |
125 | Initial import as subdirectory in AliRoot | |
126 | ||
73abe331 | 127 | Revision 1.2 2005/09/13 08:41:15 byordano |
128 | default startTime endTime added | |
129 | ||
130 | Revision 1.4 2005/08/30 09:13:02 byordano | |
131 | some docs added | |
132 | ||
133 | Revision 1.3 2005/08/29 21:15:47 byordano | |
134 | some docs added | |
135 | ||
136 | */ | |
137 | ||
138 | // | |
139 | // This class is the main manager for AliShuttle. | |
140 | // It organizes the data retrieval from DCS and call the | |
b948db8d | 141 | // interface methods of AliPreprocessor. |
73abe331 | 142 | // For every detector in AliShuttleConfgi (see AliShuttleConfig), |
143 | // data for its set of aliases is retrieved. If there is registered | |
b948db8d | 144 | // AliPreprocessor for this detector then it will be used |
145 | // accroding to the schema (see AliPreprocessor). | |
146 | // If there isn't registered AliPreprocessor than the retrieved | |
73abe331 | 147 | // data is stored automatically to the undelying AliCDBStorage. |
148 | // For detSpec is used the alias name. | |
149 | // | |
150 | ||
151 | #include "AliShuttle.h" | |
152 | ||
153 | #include "AliCDBManager.h" | |
154 | #include "AliCDBStorage.h" | |
155 | #include "AliCDBId.h" | |
84090f85 | 156 | #include "AliCDBRunRange.h" |
157 | #include "AliCDBPath.h" | |
5164a766 | 158 | #include "AliCDBEntry.h" |
73abe331 | 159 | #include "AliShuttleConfig.h" |
eba76848 | 160 | #include "DCSClient/AliDCSClient.h" |
73abe331 | 161 | #include "AliLog.h" |
b948db8d | 162 | #include "AliPreprocessor.h" |
5164a766 | 163 | #include "AliShuttleStatus.h" |
2bb7b766 | 164 | #include "AliShuttleLogbookEntry.h" |
73abe331 | 165 | |
57f50b3c | 166 | #include <TSystem.h> |
58bc3020 | 167 | #include <TObject.h> |
b948db8d | 168 | #include <TString.h> |
57f50b3c | 169 | #include <TTimeStamp.h> |
73abe331 | 170 | #include <TObjString.h> |
57f50b3c | 171 | #include <TSQLServer.h> |
172 | #include <TSQLResult.h> | |
173 | #include <TSQLRow.h> | |
cb343cfd | 174 | #include <TMutex.h> |
73abe331 | 175 | |
e7f62f16 | 176 | #include <TMonaLisaWriter.h> |
177 | ||
5164a766 | 178 | #include <fstream> |
179 | ||
cb343cfd | 180 | #include <sys/types.h> |
181 | #include <sys/wait.h> | |
182 | ||
73abe331 | 183 | ClassImp(AliShuttle) |
184 | ||
2bb7b766 | 185 | TString AliShuttle::fgkMainCDB("alien://folder=ShuttleCDB"); |
84090f85 | 186 | TString AliShuttle::fgkLocalCDB("local://LocalShuttleCDB"); |
2bb7b766 | 187 | TString AliShuttle::fgkMainRefStorage("alien://folder=ShuttleReference"); |
84090f85 | 188 | TString AliShuttle::fgkLocalRefStorage("local://LocalReferenceStorage"); |
189 | ||
4f0ab988 | 190 | Bool_t AliShuttle::fgkProcessDCS(kTRUE); |
191 | ||
36c99a6a | 192 | TString AliShuttle::fgkShuttleTempDir = gSystem->ExpandPathName("$ALICE_ROOT/SHUTTLE/temp"); |
193 | TString AliShuttle::fgkShuttleLogDir = gSystem->ExpandPathName("$ALICE_ROOT/SHUTTLE/log"); | |
57f50b3c | 194 | |
b948db8d | 195 | //______________________________________________________________________________________________ |
196 | AliShuttle::AliShuttle(const AliShuttleConfig* config, | |
197 | UInt_t timeout, Int_t retries): | |
4f0ab988 | 198 | fConfig(config), |
199 | fTimeout(timeout), fRetries(retries), | |
200 | fPreprocessorMap(), | |
2bb7b766 | 201 | fLogbookEntry(0), |
eba76848 | 202 | fCurrentDetector(), |
85a80aa9 | 203 | fStatusEntry(0), |
cb343cfd | 204 | fGridError(kFALSE), |
205 | fMonitoringMutex(0), | |
eba76848 | 206 | fLastActionTime(0), |
e7f62f16 | 207 | fLastAction(), |
208 | fMonaLisa(0) | |
73abe331 | 209 | { |
210 | // | |
211 | // config: AliShuttleConfig used | |
73abe331 | 212 | // timeout: timeout used for AliDCSClient connection |
213 | // retries: the number of retries in case of connection error. | |
214 | // | |
215 | ||
57f50b3c | 216 | if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********"); |
be48e3ea | 217 | for(int iSys=0;iSys<4;iSys++) { |
57f50b3c | 218 | fServer[iSys]=0; |
be48e3ea | 219 | if (iSys < 3) |
2c15234c | 220 | fFXSlist[iSys].SetOwner(kTRUE); |
57f50b3c | 221 | } |
2bb7b766 | 222 | fPreprocessorMap.SetOwner(kTRUE); |
be48e3ea | 223 | |
224 | for (UInt_t iDet=0; iDet<NDetectors(); iDet++) | |
225 | fFirstUnprocessed[iDet] = kFALSE; | |
226 | ||
cb343cfd | 227 | fMonitoringMutex = new TMutex(); |
58bc3020 | 228 | } |
229 | ||
b948db8d | 230 | //______________________________________________________________________________________________ |
57f50b3c | 231 | AliShuttle::~AliShuttle() |
58bc3020 | 232 | { |
233 | // destructor | |
234 | ||
b948db8d | 235 | fPreprocessorMap.DeleteAll(); |
be48e3ea | 236 | for(int iSys=0;iSys<4;iSys++) |
57f50b3c | 237 | if(fServer[iSys]) { |
238 | fServer[iSys]->Close(); | |
239 | delete fServer[iSys]; | |
eba76848 | 240 | fServer[iSys] = 0; |
57f50b3c | 241 | } |
2bb7b766 | 242 | |
243 | if (fStatusEntry){ | |
244 | delete fStatusEntry; | |
245 | fStatusEntry = 0; | |
246 | } | |
cb343cfd | 247 | |
248 | if (fMonitoringMutex) | |
249 | { | |
250 | delete fMonitoringMutex; | |
251 | fMonitoringMutex = 0; | |
252 | } | |
73abe331 | 253 | } |
254 | ||
b948db8d | 255 | //______________________________________________________________________________________________ |
57f50b3c | 256 | void AliShuttle::RegisterPreprocessor(AliPreprocessor* preprocessor) |
58bc3020 | 257 | { |
73abe331 | 258 | // |
b948db8d | 259 | // Registers new AliPreprocessor. |
73abe331 | 260 | // It uses GetName() for indentificator of the pre processor. |
261 | // The pre processor is registered it there isn't any other | |
262 | // with the same identificator (GetName()). | |
263 | // | |
264 | ||
eba76848 | 265 | const char* detName = preprocessor->GetName(); |
266 | if(GetDetPos(detName) < 0) | |
267 | AliFatal(Form("********** !!!!! Invalid detector name: %s !!!!! **********", detName)); | |
268 | ||
269 | if (fPreprocessorMap.GetValue(detName)) { | |
270 | AliWarning(Form("AliPreprocessor %s is already registered!", detName)); | |
73abe331 | 271 | return; |
272 | } | |
273 | ||
eba76848 | 274 | fPreprocessorMap.Add(new TObjString(detName), preprocessor); |
73abe331 | 275 | } |
b948db8d | 276 | //______________________________________________________________________________________________ |
84090f85 | 277 | UInt_t AliShuttle::Store(const AliCDBPath& path, TObject* object, |
278 | AliCDBMetaData* metaData, Int_t validityStart, Bool_t validityInfinite) | |
73abe331 | 279 | { |
84090f85 | 280 | // Stores a CDB object in the storage for offline reconstruction. Objects that are not needed for |
281 | // offline reconstruction, but should be stored anyway (e.g. for debugging) should NOT be stored | |
282 | // using this function. Use StoreReferenceData instead! | |
85a80aa9 | 283 | // It calls WriteToCDB function which perform actual storage |
b948db8d | 284 | |
85a80aa9 | 285 | return WriteToCDB(fgkMainCDB, fgkLocalCDB, path, object, |
286 | metaData, validityStart, validityInfinite); | |
84090f85 | 287 | |
288 | } | |
289 | ||
290 | //______________________________________________________________________________________________ | |
481441a2 | 291 | UInt_t AliShuttle::StoreReferenceData(const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData) |
84090f85 | 292 | { |
293 | // Stores a CDB object in the storage for reference data. This objects will not be available during | |
294 | // offline reconstrunction. Use this function for reference data only! | |
85a80aa9 | 295 | // It calls WriteToCDB function which perform actual storage |
296 | ||
481441a2 | 297 | return WriteToCDB(fgkMainRefStorage, fgkLocalRefStorage, path, object, metaData); |
84090f85 | 298 | |
85a80aa9 | 299 | } |
300 | ||
301 | //______________________________________________________________________________________________ | |
302 | UInt_t AliShuttle::WriteToCDB(const char* mainUri, const char* localUri, | |
303 | const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData, | |
304 | Int_t validityStart, Bool_t validityInfinite) | |
305 | { | |
306 | // write object into the CDB. Parameters are passed by Store and StoreReferenceData functions. | |
307 | // The parameters are: | |
308 | // 1) Uri of the main storage (Grid) | |
309 | // 2) Uri of the backup storage (Local) | |
310 | // 3) the object's path. | |
311 | // 4) the object to be stored | |
312 | // 5) the metaData to be associated with the object | |
313 | // 6) the validity start run number w.r.t. the current run, | |
84090f85 | 314 | // if the data is valid only for this run leave the default 0 |
85a80aa9 | 315 | // 7) specifies if the calibration data is valid for infinity (this means until updated), |
84090f85 | 316 | // typical for calibration runs, the default is kFALSE |
317 | // | |
84090f85 | 318 | // returns 0 if fail |
85a80aa9 | 319 | // 1 if stored in main (Grid) storage |
320 | // 2 if stored in backup (Local) storage | |
84090f85 | 321 | |
85a80aa9 | 322 | const char* cdbType = (mainUri == fgkMainCDB) ? "CDB" : "Reference"; |
2bb7b766 | 323 | |
85a80aa9 | 324 | Int_t firstRun = GetCurrentRun() - validityStart; |
84090f85 | 325 | if(firstRun < 0) { |
2bb7b766 | 326 | AliError("First valid run happens to be less than 0! Setting it to 0."); |
84090f85 | 327 | firstRun=0; |
328 | } | |
329 | ||
330 | Int_t lastRun = -1; | |
331 | if(validityInfinite) { | |
332 | lastRun = AliCDBRunRange::Infinity(); | |
333 | } else { | |
334 | lastRun = GetCurrentRun(); | |
335 | } | |
336 | ||
2bb7b766 | 337 | AliCDBId id(path, firstRun, lastRun, -1, -1); |
338 | ||
339 | if(! dynamic_cast<TObjString*> (metaData->GetProperty("RunUsed(TObjString)"))){ | |
340 | TObjString runUsed = Form("%d", GetCurrentRun()); | |
9e080f92 | 341 | metaData->SetProperty("RunUsed(TObjString)", runUsed.Clone()); |
2bb7b766 | 342 | } |
84090f85 | 343 | |
344 | UInt_t result = 0; | |
345 | ||
85a80aa9 | 346 | if (!(AliCDBManager::Instance()->GetStorage(mainUri))) { |
2bb7b766 | 347 | AliError(Form("WriteToCDB - Cannot activate main %s storage", cdbType)); |
84090f85 | 348 | } else { |
85a80aa9 | 349 | result = (UInt_t) AliCDBManager::Instance()->GetStorage(mainUri) |
84090f85 | 350 | ->Put(object, id, metaData); |
351 | } | |
352 | ||
353 | if(!result) { | |
354 | ||
355 | Log(fCurrentDetector, | |
2bb7b766 | 356 | Form("WriteToCDB - Problem with main %s storage. Putting <%s> into backup storage", |
357 | cdbType, path.GetPath().Data())); | |
358 | ||
359 | // Set Grid version to current run number, to ease retrieval later | |
360 | id.SetVersion(GetCurrentRun()); | |
84090f85 | 361 | |
85a80aa9 | 362 | result = AliCDBManager::Instance()->GetStorage(localUri) |
84090f85 | 363 | ->Put(object, id, metaData); |
364 | ||
365 | if(result) { | |
366 | result = 2; | |
85a80aa9 | 367 | fGridError = kTRUE; |
84090f85 | 368 | }else{ |
2bb7b766 | 369 | Log(fCurrentDetector, "WriteToCDB - Can't store data!"); |
b948db8d | 370 | } |
371 | } | |
2bb7b766 | 372 | |
b948db8d | 373 | return result; |
374 | ||
73abe331 | 375 | } |
376 | ||
b948db8d | 377 | //______________________________________________________________________________________________ |
5164a766 | 378 | AliShuttleStatus* AliShuttle::ReadShuttleStatus() |
379 | { | |
2bb7b766 | 380 | // Reads the AliShuttleStatus from the CDB |
5164a766 | 381 | |
2bb7b766 | 382 | if (fStatusEntry){ |
383 | delete fStatusEntry; | |
384 | fStatusEntry = 0; | |
385 | } | |
5164a766 | 386 | |
2bb7b766 | 387 | fStatusEntry = AliCDBManager::Instance()->GetStorage(AliShuttle::GetLocalCDB()) |
388 | ->Get(Form("/SHUTTLE/STATUS/%s", fCurrentDetector.Data()), GetCurrentRun()); | |
5164a766 | 389 | |
2bb7b766 | 390 | if (!fStatusEntry) return 0; |
391 | fStatusEntry->SetOwner(1); | |
5164a766 | 392 | |
2bb7b766 | 393 | AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject()); |
394 | if (!status) { | |
395 | AliError("Invalid object stored to CDB!"); | |
396 | return 0; | |
397 | } | |
5164a766 | 398 | |
2bb7b766 | 399 | return status; |
5164a766 | 400 | } |
401 | ||
402 | //______________________________________________________________________________________________ | |
7bfb2090 | 403 | Bool_t AliShuttle::WriteShuttleStatus(AliShuttleStatus* status) |
5164a766 | 404 | { |
2bb7b766 | 405 | // writes the status for one subdetector |
406 | ||
407 | if (fStatusEntry){ | |
408 | delete fStatusEntry; | |
409 | fStatusEntry = 0; | |
410 | } | |
5164a766 | 411 | |
2bb7b766 | 412 | Int_t run = GetCurrentRun(); |
5164a766 | 413 | |
2bb7b766 | 414 | AliCDBId id(AliCDBPath("SHUTTLE", "STATUS", fCurrentDetector), run, run); |
5164a766 | 415 | |
2bb7b766 | 416 | fStatusEntry = new AliCDBEntry(status, id, new AliCDBMetaData); |
417 | fStatusEntry->SetOwner(1); | |
5164a766 | 418 | |
2bb7b766 | 419 | UInt_t result = AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry); |
7bfb2090 | 420 | |
2bb7b766 | 421 | if (!result) { |
422 | AliError(Form("WriteShuttleStatus for %s, run %d failed", fCurrentDetector.Data(), run)); | |
423 | return kFALSE; | |
424 | } | |
e7f62f16 | 425 | |
426 | SendMLInfo(); | |
7bfb2090 | 427 | |
2bb7b766 | 428 | return kTRUE; |
5164a766 | 429 | } |
430 | ||
431 | //______________________________________________________________________________________________ | |
432 | void AliShuttle::UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount) | |
433 | { | |
434 | // changes the AliShuttleStatus for the given detector and run to the given status | |
435 | ||
2bb7b766 | 436 | if (!fStatusEntry){ |
437 | AliError("UNEXPECTED: fStatusEntry empty"); | |
438 | return; | |
439 | } | |
5164a766 | 440 | |
2bb7b766 | 441 | AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject()); |
5164a766 | 442 | |
2bb7b766 | 443 | if (!status){ |
444 | AliError("UNEXPECTED: status could not be read from current CDB entry"); | |
445 | return; | |
446 | } | |
5164a766 | 447 | |
2c15234c | 448 | TString actionStr = Form("UpdateShuttleStatus - %s: Changing state from %s to %s", |
eba76848 | 449 | fCurrentDetector.Data(), |
36c99a6a | 450 | status->GetStatusName(), |
eba76848 | 451 | status->GetStatusName(newStatus)); |
cb343cfd | 452 | Log("SHUTTLE", actionStr); |
453 | SetLastAction(actionStr); | |
5164a766 | 454 | |
2bb7b766 | 455 | status->SetStatus(newStatus); |
456 | if (increaseCount) status->IncreaseCount(); | |
5164a766 | 457 | |
2bb7b766 | 458 | AliCDBManager::Instance()->GetStorage(fgkLocalCDB)->Put(fStatusEntry); |
e7f62f16 | 459 | |
460 | SendMLInfo(); | |
5164a766 | 461 | } |
e7f62f16 | 462 | |
463 | //______________________________________________________________________________________________ | |
464 | void AliShuttle::SendMLInfo() | |
465 | { | |
466 | // | |
467 | // sends ML information about the current status of the current detector being processed | |
468 | // | |
469 | ||
470 | AliShuttleStatus* status = dynamic_cast<AliShuttleStatus*> (fStatusEntry->GetObject()); | |
471 | ||
472 | if (!status){ | |
473 | AliError("UNEXPECTED: status could not be read from current CDB entry"); | |
474 | return; | |
475 | } | |
476 | ||
477 | TMonaLisaText mlStatus(Form("%s_status", fCurrentDetector.Data()), status->GetStatusName()); | |
478 | TMonaLisaValue mlRetryCount(Form("%s_count", fCurrentDetector.Data()), status->GetCount()); | |
479 | ||
480 | TList mlList; | |
481 | mlList.Add(&mlStatus); | |
482 | mlList.Add(&mlRetryCount); | |
483 | ||
484 | fMonaLisa->SendParameters(&mlList); | |
485 | } | |
486 | ||
5164a766 | 487 | //______________________________________________________________________________________________ |
488 | Bool_t AliShuttle::ContinueProcessing() | |
489 | { | |
2bb7b766 | 490 | // this function reads the AliShuttleStatus information from CDB and |
491 | // checks if the processing should be continued | |
492 | // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus | |
493 | ||
57c1a579 | 494 | if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE; |
495 | ||
496 | AliPreprocessor* aPreprocessor = | |
497 | dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector)); | |
498 | if (!aPreprocessor) | |
499 | { | |
500 | AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data())); | |
501 | return kFALSE; | |
502 | } | |
503 | ||
2bb7b766 | 504 | AliShuttleLogbookEntry::Status entryStatus = |
eba76848 | 505 | fLogbookEntry->GetDetectorStatus(fCurrentDetector); |
2bb7b766 | 506 | |
507 | if(entryStatus != AliShuttleLogbookEntry::kUnprocessed) { | |
9e080f92 | 508 | AliInfo(Form("ContinueProcessing - %s is %s", |
2bb7b766 | 509 | fCurrentDetector.Data(), |
510 | fLogbookEntry->GetDetectorStatusName(entryStatus))); | |
511 | return kFALSE; | |
512 | } | |
513 | ||
514 | // if we get here, according to Shuttle logbook subdetector is in UNPROCESSED state | |
be48e3ea | 515 | |
516 | // check if current run is first unprocessed run for current detector | |
517 | if (fConfig->StrictRunOrder(fCurrentDetector) && | |
518 | !fFirstUnprocessed[GetDetPos(fCurrentDetector)]) | |
519 | { | |
520 | Log("SHUTTLE", Form("ContinueProcessing - %s requires strict run ordering but this is not the first unprocessed run!")); | |
521 | return kFALSE; | |
522 | } | |
523 | ||
2bb7b766 | 524 | AliShuttleStatus* status = ReadShuttleStatus(); |
525 | if (!status) { | |
526 | // first time | |
527 | Log("SHUTTLE", Form("ContinueProcessing - %s: Processing first time", | |
528 | fCurrentDetector.Data())); | |
529 | status = new AliShuttleStatus(AliShuttleStatus::kStarted); | |
530 | return WriteShuttleStatus(status); | |
531 | } | |
532 | ||
533 | // The following two cases shouldn't happen if Shuttle Logbook was correctly updated. | |
534 | // If it happens it may mean Logbook updating failed... let's do it now! | |
535 | if (status->GetStatus() == AliShuttleStatus::kDone || | |
536 | status->GetStatus() == AliShuttleStatus::kFailed){ | |
537 | Log("SHUTTLE", Form("ContinueProcessing - %s is already %s. Updating Shuttle Logbook", | |
538 | fCurrentDetector.Data(), | |
539 | status->GetStatusName(status->GetStatus()))); | |
540 | UpdateShuttleLogbook(fCurrentDetector.Data(), | |
541 | status->GetStatusName(status->GetStatus())); | |
542 | return kFALSE; | |
543 | } | |
544 | ||
545 | if (status->GetStatus() == AliShuttleStatus::kStoreFailed) { | |
546 | Log("SHUTTLE", | |
547 | Form("ContinueProcessing - %s: Grid storage of one or more objects failed. Trying again now", | |
548 | fCurrentDetector.Data())); | |
549 | if(TryToStoreAgain()){ | |
550 | Log(fCurrentDetector.Data(), "ContinueProcessing - All objects successfully stored into OCDB"); | |
551 | UpdateShuttleStatus(AliShuttleStatus::kDone); | |
552 | UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE"); | |
553 | } else { | |
554 | Log("SHUTTLE", | |
555 | Form("ContinueProcessing - %s: Grid storage failed again", | |
556 | fCurrentDetector.Data())); | |
e7f62f16 | 557 | // trigger ML information manually because we do not had a status change |
558 | SendMLInfo(); | |
2bb7b766 | 559 | } |
560 | return kFALSE; | |
561 | } | |
562 | ||
563 | // if we get here, there is a restart | |
57c1a579 | 564 | Bool_t cont = kFALSE; |
2bb7b766 | 565 | |
566 | // abort conditions | |
cb343cfd | 567 | if (status->GetCount() >= fConfig->GetMaxRetries()) { |
57c1a579 | 568 | Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - " |
569 | "Updating Shuttle Logbook", fCurrentDetector.Data(), | |
2bb7b766 | 570 | status->GetCount(), status->GetStatusName())); |
571 | UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED"); | |
e7f62f16 | 572 | UpdateShuttleStatus(AliShuttleStatus::kFailed); |
57c1a579 | 573 | } else { |
574 | Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. " | |
575 | "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(), | |
576 | status->GetStatusName(), status->GetCount())); | |
577 | UpdateShuttleStatus(AliShuttleStatus::kStarted, kTRUE); | |
578 | cont = kTRUE; | |
2bb7b766 | 579 | } |
580 | ||
57c1a579 | 581 | // Send mail to detector expert! |
582 | AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data())); | |
583 | if (!SendMail()) | |
584 | Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert", | |
585 | fCurrentDetector.Data())); | |
2bb7b766 | 586 | |
57c1a579 | 587 | return cont; |
5164a766 | 588 | } |
589 | ||
590 | //______________________________________________________________________________________________ | |
2bb7b766 | 591 | Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry) |
58bc3020 | 592 | { |
73abe331 | 593 | // |
b948db8d | 594 | // Makes data retrieval for all detectors in the configuration. |
2bb7b766 | 595 | // entry: Shuttle logbook entry, contains run paramenters and status of detectors |
596 | // (Unprocessed, Inactive, Failed or Done). | |
d477ad88 | 597 | // Returns kFALSE in case of error occured and kTRUE otherwise |
73abe331 | 598 | // |
599 | ||
2bb7b766 | 600 | if(!entry) return kFALSE; |
601 | ||
602 | fLogbookEntry = entry; | |
603 | ||
e7f62f16 | 604 | if (fLogbookEntry->IsDone()) |
605 | { | |
2bb7b766 | 606 | Log("SHUTTLE","Process - Shuttle is already DONE. Updating logbook"); |
607 | UpdateShuttleLogbook("shuttle_done"); | |
608 | fLogbookEntry = 0; | |
609 | return kTRUE; | |
610 | } | |
611 | ||
e7f62f16 | 612 | // create ML instance that monitors this run |
613 | fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch"); | |
614 | // disable monitoring of other parameters that come e.g. from TFile | |
615 | gMonitoringWriter = 0; | |
2bb7b766 | 616 | |
617 | AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^* \n", | |
618 | GetCurrentRun())); | |
619 | ||
e7f62f16 | 620 | |
621 | // Send the information to ML | |
622 | TMonaLisaText mlStatus("SHUTTLE_status", "Processing"); | |
623 | ||
624 | TList mlList; | |
625 | mlList.Add(&mlStatus); | |
626 | ||
627 | fMonaLisa->SendParameters(&mlList); | |
628 | ||
eba76848 | 629 | fLogbookEntry->Print("all"); |
57f50b3c | 630 | |
631 | // Initialization | |
d477ad88 | 632 | Bool_t hasError = kFALSE; |
5164a766 | 633 | |
2bb7b766 | 634 | AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB); |
635 | if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun()); | |
636 | AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage); | |
637 | if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun()); | |
d477ad88 | 638 | |
57f50b3c | 639 | // Loop on detectors in the configuration |
b948db8d | 640 | TIter iter(fConfig->GetDetectors()); |
2bb7b766 | 641 | TObjString* aDetector = 0; |
b948db8d | 642 | |
be48e3ea | 643 | while ((aDetector = (TObjString*) iter.Next())) |
644 | { | |
7bfb2090 | 645 | fCurrentDetector = aDetector->String(); |
5164a766 | 646 | |
9e080f92 | 647 | if (ContinueProcessing() == kFALSE) continue; |
648 | ||
2bb7b766 | 649 | AliInfo(Form("\n\n \t\t\t****** run %d - %s: START ******", |
650 | GetCurrentRun(), aDetector->GetName())); | |
651 | ||
9d733021 | 652 | for(Int_t iSys=0;iSys<3;iSys++) fFXSCalled[iSys]=kFALSE; |
653 | ||
e7f62f16 | 654 | Log(fCurrentDetector.Data(), "Starting processing"); |
85a80aa9 | 655 | |
be48e3ea | 656 | Int_t pid = fork(); |
657 | ||
658 | if (pid < 0) | |
659 | { | |
660 | Log("SHUTTLE", "ERROR: Forking failed"); | |
661 | } | |
662 | else if (pid > 0) | |
663 | { | |
664 | // parent | |
665 | AliInfo(Form("In parent process of %d - %s: Starting monitoring", | |
666 | GetCurrentRun(), aDetector->GetName())); | |
667 | ||
668 | Long_t begin = time(0); | |
669 | ||
670 | int status; // to be used with waitpid, on purpose an int (not Int_t)! | |
671 | while (waitpid(pid, &status, WNOHANG) == 0) | |
672 | { | |
673 | Long_t expiredTime = time(0) - begin; | |
674 | ||
675 | if (expiredTime > fConfig->GetPPTimeOut()) | |
676 | { | |
677 | Log("SHUTTLE", Form("Process time out. Run time: %d seconds. Killing...", | |
678 | expiredTime)); | |
679 | ||
680 | kill(pid, 9); | |
681 | ||
682 | hasError = kTRUE; | |
683 | ||
684 | gSystem->Sleep(1000); | |
685 | } | |
686 | else | |
687 | { | |
688 | if (expiredTime % 60 == 0) | |
689 | Log("SHUTTLE", Form("Checked process. Run time: %d seconds.", | |
690 | expiredTime)); | |
691 | gSystem->Sleep(1000); | |
692 | } | |
693 | } | |
694 | ||
695 | AliInfo(Form("In parent process of %d - %s: Client has terminated.", | |
696 | GetCurrentRun(), aDetector->GetName())); | |
697 | ||
698 | if (WIFEXITED(status)) | |
699 | { | |
700 | Int_t returnCode = WEXITSTATUS(status); | |
701 | ||
702 | Log("SHUTTLE", Form("The return code is %d", returnCode)); | |
703 | ||
704 | if (returnCode != 0) | |
705 | hasError = kTRUE; | |
706 | } | |
707 | } | |
708 | else if (pid == 0) | |
709 | { | |
710 | // client | |
711 | AliInfo(Form("In client process of %d - %s", GetCurrentRun(), aDetector->GetName())); | |
712 | ||
713 | UInt_t result = ProcessCurrentDetector(); | |
714 | ||
715 | Int_t returnCode = 0; // will be set to 1 in case of an error | |
716 | ||
717 | if (!result) | |
718 | { | |
719 | returnCode = 1; | |
720 | AliInfo(Form("\n \t\t\t****** run %d - %s: PREPROCESSOR ERROR ****** \n\n", | |
721 | GetCurrentRun(), aDetector->GetName())); | |
722 | } | |
723 | else if (result == 2) | |
724 | { | |
725 | AliInfo(Form("\n \t\t\t****** run %d - %s: STORAGE ERROR ****** \n\n", | |
726 | GetCurrentRun(), aDetector->GetName())); | |
727 | } else | |
728 | { | |
729 | AliInfo(Form("\n \t\t\t****** run %d - %s: DONE ****** \n\n", | |
730 | GetCurrentRun(), aDetector->GetName())); | |
731 | } | |
732 | ||
733 | if (result > 0) | |
734 | { | |
2c15234c | 735 | // Process successful: Update time_processed field in FXS logbooks! |
9d733021 | 736 | if (UpdateTable() == kFALSE) returnCode = 1; |
be48e3ea | 737 | } |
738 | ||
4b95672b | 739 | for (UInt_t iSys=0; iSys<3; iSys++) |
740 | { | |
741 | if (fFXSCalled[iSys]) fFXSlist[iSys].Clear(); | |
742 | } | |
743 | ||
be48e3ea | 744 | AliInfo(Form("Client process of %d - %s is exiting now with %d.", |
745 | GetCurrentRun(), aDetector->GetName(), returnCode)); | |
746 | ||
747 | // the client exits here | |
748 | gSystem->Exit(returnCode); | |
749 | ||
750 | AliError("We should never get here!!!"); | |
751 | } | |
7bfb2090 | 752 | } |
5164a766 | 753 | |
2bb7b766 | 754 | AliInfo(Form("\n\n \t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: FINISH ^*^*^*^*^*^*^*^*^*^*^*^* \n", |
755 | GetCurrentRun())); | |
756 | ||
757 | //check if shuttle is done for this run, if so update logbook | |
758 | TObjArray checkEntryArray; | |
759 | checkEntryArray.SetOwner(1); | |
9e080f92 | 760 | TString whereClause = Form("where run=%d", GetCurrentRun()); |
761 | if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) { | |
762 | Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!", | |
763 | GetCurrentRun())); | |
764 | return hasError == kFALSE; | |
765 | } | |
b948db8d | 766 | |
9e080f92 | 767 | AliShuttleLogbookEntry* checkEntry = dynamic_cast<AliShuttleLogbookEntry*> |
768 | (checkEntryArray.At(0)); | |
2bb7b766 | 769 | |
9e080f92 | 770 | if (checkEntry) |
771 | { | |
772 | if (checkEntry->IsDone()) | |
be48e3ea | 773 | { |
9e080f92 | 774 | Log("SHUTTLE","Process - Shuttle is DONE. Updating logbook"); |
775 | UpdateShuttleLogbook("shuttle_done"); | |
776 | } | |
777 | else | |
778 | { | |
779 | for (UInt_t iDet=0; iDet<NDetectors(); iDet++) | |
be48e3ea | 780 | { |
9e080f92 | 781 | if (checkEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed) |
be48e3ea | 782 | { |
9e080f92 | 783 | AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"", |
784 | checkEntry->GetRun(), GetDetName(iDet))); | |
785 | fFirstUnprocessed[iDet] = kFALSE; | |
be48e3ea | 786 | } |
787 | } | |
2bb7b766 | 788 | } |
789 | } | |
790 | ||
e7f62f16 | 791 | // remove ML instance |
792 | delete fMonaLisa; | |
793 | fMonaLisa = 0; | |
794 | ||
2bb7b766 | 795 | fLogbookEntry = 0; |
85a80aa9 | 796 | |
a7160fe9 | 797 | return hasError == kFALSE; |
73abe331 | 798 | } |
799 | ||
b948db8d | 800 | //______________________________________________________________________________________________ |
2bb7b766 | 801 | UInt_t AliShuttle::ProcessCurrentDetector() |
73abe331 | 802 | { |
803 | // | |
2bb7b766 | 804 | // Makes data retrieval just for a specific detector (fCurrentDetector). |
73abe331 | 805 | // Threre should be a configuration for this detector. |
73abe331 | 806 | |
2bb7b766 | 807 | AliInfo(Form("Retrieving values for %s, run %d", fCurrentDetector.Data(), GetCurrentRun())); |
73abe331 | 808 | |
7bfb2090 | 809 | UpdateShuttleStatus(AliShuttleStatus::kDCSStarted); |
73abe331 | 810 | |
2c15234c | 811 | TMap dcsMap; |
812 | dcsMap.SetOwner(1); | |
73abe331 | 813 | |
85a80aa9 | 814 | Bool_t aDCSError = kFALSE; |
815 | fGridError = kFALSE; | |
d477ad88 | 816 | |
2c15234c | 817 | // TODO Test only... I've added a flag that allows to |
818 | // exclude DCS archive DB query | |
819 | if (!fgkProcessDCS) | |
820 | { | |
821 | AliInfo("Skipping DCS processing!"); | |
822 | aDCSError = kFALSE; | |
823 | } else { | |
824 | TString host(fConfig->GetDCSHost(fCurrentDetector)); | |
825 | Int_t port = fConfig->GetDCSPort(fCurrentDetector); | |
826 | ||
827 | // Retrieval of Aliases | |
828 | TObjString* anAlias = 0; | |
36c99a6a | 829 | Int_t iAlias = 1; |
830 | Int_t nTotAliases= ((TMap*)fConfig->GetDCSAliases(fCurrentDetector))->GetEntries(); | |
2c15234c | 831 | TIter iterAliases(fConfig->GetDCSAliases(fCurrentDetector)); |
832 | while ((anAlias = (TObjString*) iterAliases.Next())) | |
833 | { | |
834 | TObjArray *valueSet = new TObjArray(); | |
835 | valueSet->SetOwner(1); | |
836 | ||
36c99a6a | 837 | if (((iAlias-1) % 500) == 0 || iAlias == nTotAliases) |
838 | AliInfo(Form("Querying DCS archive: alias %s (%d of %d)", | |
839 | anAlias->GetName(), iAlias++, nTotAliases)); | |
2c15234c | 840 | aDCSError = (GetValueSet(host, port, anAlias->String(), valueSet, kAlias) == 0); |
841 | ||
842 | if(!aDCSError) | |
843 | { | |
844 | dcsMap.Add(anAlias->Clone(), valueSet); | |
845 | } else { | |
846 | Log(fCurrentDetector, | |
847 | Form("ProcessCurrentDetector - Error while retrieving alias %s", | |
848 | anAlias->GetName())); | |
849 | UpdateShuttleStatus(AliShuttleStatus::kDCSError); | |
850 | dcsMap.DeleteAll(); | |
851 | return 0; | |
852 | } | |
4f0ab988 | 853 | } |
2c15234c | 854 | |
855 | // Retrieval of Data Points | |
856 | TObjString* aDP = 0; | |
36c99a6a | 857 | Int_t iDP = 0; |
858 | Int_t nTotDPs= ((TMap*)fConfig->GetDCSDataPoints(fCurrentDetector))->GetEntries(); | |
2c15234c | 859 | TIter iterDP(fConfig->GetDCSDataPoints(fCurrentDetector)); |
860 | while ((aDP = (TObjString*) iterDP.Next())) | |
861 | { | |
862 | TObjArray *valueSet = new TObjArray(); | |
863 | valueSet->SetOwner(1); | |
36c99a6a | 864 | if (((iDP-1) % 500) == 0 || iDP == nTotDPs) |
865 | AliInfo(Form("Querying DCS archive: DP %s (%d of %d)", | |
866 | aDP->GetName(), iDP++, nTotDPs)); | |
2c15234c | 867 | aDCSError = (GetValueSet(host, port, aDP->String(), valueSet, kDP) == 0); |
868 | ||
869 | if(!aDCSError) | |
870 | { | |
871 | dcsMap.Add(aDP->Clone(), valueSet); | |
872 | } else { | |
873 | Log(fCurrentDetector, | |
874 | Form("ProcessCurrentDetector - Error while retrieving data point %s", | |
875 | aDP->GetName())); | |
876 | UpdateShuttleStatus(AliShuttleStatus::kDCSError); | |
877 | dcsMap.DeleteAll(); | |
878 | return 0; | |
879 | } | |
73abe331 | 880 | } |
881 | } | |
b948db8d | 882 | |
2bb7b766 | 883 | // DCS Archive DB processing successful. Call Preprocessor! |
85a80aa9 | 884 | UpdateShuttleStatus(AliShuttleStatus::kPPStarted); |
a7160fe9 | 885 | |
85a80aa9 | 886 | AliPreprocessor* aPreprocessor = |
5164a766 | 887 | dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector)); |
b948db8d | 888 | |
2bb7b766 | 889 | aPreprocessor->Initialize(GetCurrentRun(), GetCurrentStartTime(), GetCurrentEndTime()); |
2c15234c | 890 | UInt_t aPPResult = aPreprocessor->Process(&dcsMap); |
2bb7b766 | 891 | |
892 | UInt_t returnValue = 0; | |
85a80aa9 | 893 | if (aPPResult == 0) { // Preprocessor error |
cb343cfd | 894 | UpdateShuttleStatus(AliShuttleStatus::kPPError); |
2bb7b766 | 895 | returnValue = 0; |
85a80aa9 | 896 | } else if (fGridError == kFALSE) { // process and Grid storage ok! |
897 | UpdateShuttleStatus(AliShuttleStatus::kDone); | |
2bb7b766 | 898 | UpdateShuttleLogbook(fCurrentDetector, "DONE"); |
899 | Log(fCurrentDetector.Data(), | |
900 | "ProcessCurrentDetector - Preprocessor and Grid storage ended successfully"); | |
901 | returnValue = 1; | |
85a80aa9 | 902 | } else { // Grid storage error (process ok, but object put in local storage) |
903 | UpdateShuttleStatus(AliShuttleStatus::kStoreFailed); | |
2bb7b766 | 904 | returnValue = 2; |
85a80aa9 | 905 | } |
b948db8d | 906 | |
2c15234c | 907 | dcsMap.DeleteAll(); |
b948db8d | 908 | |
2bb7b766 | 909 | return returnValue; |
910 | } | |
911 | ||
912 | //______________________________________________________________________________________________ | |
913 | Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause, | |
914 | TObjArray& entries) | |
915 | { | |
916 | // Query DAQ's Shuttle logbook and fills detector status object. | |
917 | // Call QueryRunParameters to query DAQ logbook for run parameters. | |
918 | ||
fc5a4708 | 919 | entries.SetOwner(1); |
920 | ||
2bb7b766 | 921 | // check connection, in case connect |
be48e3ea | 922 | if(!Connect(3)) return kFALSE; |
2bb7b766 | 923 | |
924 | TString sqlQuery; | |
925 | sqlQuery = Form("select * from logbook_shuttle %s order by run", whereClause); | |
926 | ||
be48e3ea | 927 | TSQLResult* aResult = fServer[3]->Query(sqlQuery); |
2bb7b766 | 928 | if (!aResult) { |
929 | AliError(Form("Can't execute query <%s>!", sqlQuery.Data())); | |
930 | return kFALSE; | |
931 | } | |
932 | ||
fc5a4708 | 933 | AliDebug(2,Form("Query = %s", sqlQuery.Data())); |
934 | ||
2bb7b766 | 935 | if(aResult->GetRowCount() == 0) { |
9e080f92 | 936 | // if(sqlQuery.EndsWith("where shuttle_done=0 order by run")){ |
937 | // Log("SHUTTLE", "QueryShuttleLogbook - All runs in Shuttle Logbook are already DONE"); | |
938 | // delete aResult; | |
939 | // return kTRUE; | |
940 | // } else { | |
941 | AliInfo("No entries in Shuttle Logbook match request"); | |
2bb7b766 | 942 | delete aResult; |
943 | return kTRUE; | |
9e080f92 | 944 | // } |
2bb7b766 | 945 | } |
946 | ||
947 | // TODO Check field count! | |
fc5a4708 | 948 | const UInt_t nCols = 22; |
2bb7b766 | 949 | if (aResult->GetFieldCount() != (Int_t) nCols) { |
950 | AliError("Invalid SQL result field number!"); | |
951 | delete aResult; | |
952 | return kFALSE; | |
953 | } | |
954 | ||
2bb7b766 | 955 | TSQLRow* aRow; |
956 | while ((aRow = aResult->Next())) { | |
957 | TString runString(aRow->GetField(0), aRow->GetFieldLength(0)); | |
958 | Int_t run = runString.Atoi(); | |
959 | ||
eba76848 | 960 | AliShuttleLogbookEntry *entry = QueryRunParameters(run); |
961 | if (!entry) | |
962 | continue; | |
2bb7b766 | 963 | |
964 | // loop on detectors | |
eba76848 | 965 | for(UInt_t ii = 0; ii < nCols; ii++) |
966 | entry->SetDetectorStatus(aResult->GetFieldName(ii), aRow->GetField(ii)); | |
2bb7b766 | 967 | |
eba76848 | 968 | entries.AddLast(entry); |
2bb7b766 | 969 | delete aRow; |
970 | } | |
971 | ||
9e080f92 | 972 | // if(sqlQuery.EndsWith("where shuttle_done=0 order by run")) |
973 | // Log("SHUTTLE", Form("QueryShuttleLogbook - Found %d unprocessed runs in Shuttle Logbook", | |
974 | // entries.GetEntriesFast())); | |
2bb7b766 | 975 | delete aResult; |
976 | return kTRUE; | |
977 | } | |
978 | ||
979 | //______________________________________________________________________________________________ | |
eba76848 | 980 | AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run) |
2bb7b766 | 981 | { |
eba76848 | 982 | // |
983 | // Retrieve run parameters written in the DAQ logbook and sets them into AliShuttleLogbookEntry object | |
984 | // | |
2bb7b766 | 985 | |
986 | // check connection, in case connect | |
be48e3ea | 987 | if (!Connect(3)) |
eba76848 | 988 | return 0; |
2bb7b766 | 989 | |
990 | TString sqlQuery; | |
2c15234c | 991 | sqlQuery.Form("select * from %s where run=%d", fConfig->GetDAQlbTable(), run); |
2bb7b766 | 992 | |
be48e3ea | 993 | TSQLResult* aResult = fServer[3]->Query(sqlQuery); |
2bb7b766 | 994 | if (!aResult) { |
995 | AliError(Form("Can't execute query <%s>!", sqlQuery.Data())); | |
eba76848 | 996 | return 0; |
2bb7b766 | 997 | } |
998 | ||
eba76848 | 999 | if (aResult->GetRowCount() == 0) { |
2bb7b766 | 1000 | Log("SHUTTLE", Form("QueryRunParameters - No entry in DAQ Logbook for run %d. Skipping", run)); |
1001 | delete aResult; | |
eba76848 | 1002 | return 0; |
2bb7b766 | 1003 | } |
1004 | ||
eba76848 | 1005 | if (aResult->GetRowCount() > 1) { |
2bb7b766 | 1006 | AliError(Form("More than one entry in DAQ Logbook for run %d. Skipping", run)); |
1007 | delete aResult; | |
eba76848 | 1008 | return 0; |
2bb7b766 | 1009 | } |
1010 | ||
eba76848 | 1011 | TSQLRow* aRow = aResult->Next(); |
1012 | if (!aRow) | |
1013 | { | |
1014 | AliError(Form("Could not retrieve row for run %d. Skipping", run)); | |
1015 | delete aResult; | |
1016 | return 0; | |
1017 | } | |
2bb7b766 | 1018 | |
eba76848 | 1019 | AliShuttleLogbookEntry* entry = new AliShuttleLogbookEntry(run); |
2bb7b766 | 1020 | |
eba76848 | 1021 | for (Int_t ii = 0; ii < aResult->GetFieldCount(); ii++) |
1022 | entry->SetRunParameter(aResult->GetFieldName(ii), aRow->GetField(ii)); | |
2bb7b766 | 1023 | |
eba76848 | 1024 | UInt_t startTime = entry->GetStartTime(); |
1025 | UInt_t endTime = entry->GetEndTime(); | |
1026 | ||
1027 | if (!startTime || !endTime || startTime > endTime) { | |
1028 | Log("SHUTTLE", | |
1029 | Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d", | |
1030 | run, startTime, endTime)); | |
1031 | delete entry; | |
2bb7b766 | 1032 | delete aRow; |
eba76848 | 1033 | delete aResult; |
1034 | return 0; | |
2bb7b766 | 1035 | } |
1036 | ||
eba76848 | 1037 | delete aRow; |
2bb7b766 | 1038 | delete aResult; |
eba76848 | 1039 | |
1040 | return entry; | |
2bb7b766 | 1041 | } |
1042 | ||
1043 | //______________________________________________________________________________________________ | |
1044 | Bool_t AliShuttle::TryToStoreAgain() | |
1045 | { | |
1046 | // Called in case the detector failed to store the object in Grid OCDB | |
1047 | // It tries to store the object again, if it does not find more recent and overlapping objects | |
1048 | // Calls underlying TryToStoreAgain(const char*) function twice, for OCDB and Reference storage. | |
1049 | ||
1050 | AliInfo("Trying to store OCDB data again..."); | |
1051 | Bool_t resultCDB = TryToStoreAgain(fgkMainCDB); | |
1052 | ||
1053 | AliInfo("Trying to store reference data again..."); | |
1054 | Bool_t resultRef = TryToStoreAgain(fgkMainRefStorage); | |
1055 | ||
1056 | return resultCDB && resultRef; | |
1057 | } | |
1058 | ||
1059 | //______________________________________________________________________________________________ | |
1060 | Bool_t AliShuttle::TryToStoreAgain(TString& gridURI) | |
1061 | { | |
1062 | // Called by TryToStoreAgain(), performs actual storage retry | |
1063 | ||
6ec0e06c | 1064 | TObjArray* gridIds=0; |
2bb7b766 | 1065 | |
1066 | Bool_t result = kTRUE; | |
1067 | ||
1068 | const char* type = 0; | |
1069 | TString backupURI; | |
1070 | if(gridURI == fgkMainCDB) { | |
1071 | type = "OCDB"; | |
1072 | backupURI = fgkLocalCDB; | |
1073 | } else if(gridURI == fgkMainRefStorage) { | |
1074 | type = "reference"; | |
1075 | backupURI = fgkLocalRefStorage; | |
1076 | } else { | |
1077 | AliError(Form("Invalid storage URI: %s", gridURI.Data())); | |
1078 | return kFALSE; | |
1079 | } | |
1080 | ||
1081 | AliCDBManager* man = AliCDBManager::Instance(); | |
1082 | ||
1083 | AliCDBStorage *gridSto = man->GetStorage(gridURI); | |
1084 | if(!gridSto) { | |
1085 | Log(fCurrentDetector.Data(), | |
1086 | Form("TryToStoreAgain - cannot activate main %s storage", type)); | |
1087 | return kFALSE; | |
1088 | } | |
1089 | ||
1090 | gridIds = gridSto->GetQueryCDBList(); | |
1091 | ||
1092 | // get objects previously stored in local CDB | |
1093 | AliCDBStorage *backupSto = man->GetStorage(backupURI); | |
eba76848 | 1094 | AliCDBPath aPath(GetOfflineDetName(fCurrentDetector.Data()),"*","*"); |
2bb7b766 | 1095 | // Local objects were stored with current run as Grid version! |
1096 | TList* localEntries = backupSto->GetAll(aPath.GetPath(), GetCurrentRun(), GetCurrentRun()); | |
1097 | localEntries->SetOwner(1); | |
1098 | ||
1099 | // loop on local stored objects | |
1100 | TIter localIter(localEntries); | |
1101 | AliCDBEntry *aLocEntry = 0; | |
1102 | while((aLocEntry = dynamic_cast<AliCDBEntry*> (localIter.Next()))){ | |
1103 | aLocEntry->SetOwner(1); | |
1104 | AliCDBId aLocId = aLocEntry->GetId(); | |
1105 | aLocEntry->SetVersion(-1); | |
1106 | aLocEntry->SetSubVersion(-1); | |
1107 | ||
1108 | // loop on Grid valid Id's | |
1109 | Bool_t store = kTRUE; | |
1110 | TIter gridIter(gridIds); | |
1111 | AliCDBId* aGridId = 0; | |
1112 | while((aGridId = dynamic_cast<AliCDBId*> (gridIter.Next()))){ | |
be48e3ea | 1113 | // If local object is valid up to infinity we store it only if it is |
1114 | // the first unprocessed run! | |
1115 | if (aLocId.GetLastRun() == AliCDBRunRange::Infinity()) | |
1116 | { | |
1117 | if (!fFirstUnprocessed[GetDetPos(fCurrentDetector)]) | |
1118 | { | |
1119 | Log(fCurrentDetector.Data(), | |
2c15234c | 1120 | ("TryToStoreAgain - This object has validity infinite but " |
1121 | "there are previous unprocessed runs!")); | |
be48e3ea | 1122 | continue; |
1123 | } else { | |
1124 | break; | |
1125 | } | |
2bb7b766 | 1126 | } |
1127 | if(aGridId->GetPath() != aLocId.GetPath()) continue; | |
1128 | // skip all objects valid up to infinity | |
1129 | if(aGridId->GetLastRun() == AliCDBRunRange::Infinity()) continue; | |
1130 | // if we get here, it means there's already some more recent object stored on Grid! | |
1131 | store = kFALSE; | |
1132 | break; | |
1133 | } | |
1134 | ||
1135 | if(!store){ | |
1136 | Log(fCurrentDetector.Data(), | |
1137 | Form("TryToStoreAgain - A more recent object already exists in %s storage: <%s>", | |
1138 | type, aGridId->ToString().Data())); | |
1139 | // removing local filename... | |
1140 | // TODO maybe it's better not to remove it, it was not copied to the Grid! | |
1141 | TString filename; | |
1142 | backupSto->IdToFilename(aLocId, filename); | |
1143 | AliInfo(Form("Removing local file %s", filename.Data())); | |
1144 | gSystem->Exec(Form("rm %s",filename.Data())); | |
1145 | continue; | |
1146 | } | |
1147 | ||
1148 | // If we get here, the file can be stored! | |
1149 | Bool_t storeOk = gridSto->Put(aLocEntry); | |
1150 | if(storeOk){ | |
1151 | Log(fCurrentDetector.Data(), | |
1152 | Form("TryToStoreAgain - Object <%s> successfully put into %s storage", | |
1153 | aLocId.ToString().Data(), type)); | |
1154 | ||
1155 | // removing local filename... | |
1156 | TString filename; | |
1157 | backupSto->IdToFilename(aLocId, filename); | |
1158 | AliInfo(Form("Removing local file %s", filename.Data())); | |
1159 | gSystem->Exec(Form("rm %s", filename.Data())); | |
1160 | continue; | |
1161 | } else { | |
1162 | Log(fCurrentDetector.Data(), | |
1163 | Form("TryToStoreAgain - Grid %s storage of object <%s> failed again", | |
1164 | type, aLocId.ToString().Data())); | |
1165 | result = kFALSE; | |
1166 | } | |
1167 | } | |
1168 | localEntries->Clear(); | |
1169 | ||
1170 | return result; | |
73abe331 | 1171 | } |
1172 | ||
b948db8d | 1173 | //______________________________________________________________________________________________ |
2c15234c | 1174 | Bool_t AliShuttle::GetValueSet(const char* host, Int_t port, const char* entry, |
1175 | TObjArray* valueSet, DCSType type) | |
73abe331 | 1176 | { |
2c15234c | 1177 | // Retrieve all "entry" data points from the DCS server |
58bc3020 | 1178 | // host, port: TSocket connection parameters |
2c15234c | 1179 | // entry: name of the alias or data point |
2bb7b766 | 1180 | // valueSet: array of retrieved AliDCSValue's |
2c15234c | 1181 | // type: kAlias or kDP |
58bc3020 | 1182 | |
73abe331 | 1183 | AliDCSClient client(host, port, fTimeout, fRetries); |
2c15234c | 1184 | if (!client.IsConnected()) |
1185 | { | |
b948db8d | 1186 | return kFALSE; |
73abe331 | 1187 | } |
1188 | ||
2c15234c | 1189 | Int_t result=0; |
73abe331 | 1190 | |
2c15234c | 1191 | if (type == kAlias) |
1192 | { | |
1193 | result = client.GetAliasValues(entry, | |
1194 | GetCurrentStartTime(), GetCurrentEndTime(), valueSet); | |
1195 | } else | |
1196 | if (type == kDP) | |
1197 | { | |
1198 | result = client.GetDPValues(entry, | |
1199 | GetCurrentStartTime(), GetCurrentEndTime(), valueSet); | |
1200 | } | |
1201 | ||
1202 | if (result < 0) | |
1203 | { | |
2bb7b766 | 1204 | Log(fCurrentDetector.Data(), Form("GetValueSet - Can't get '%s'! Reason: %s", |
2c15234c | 1205 | entry, AliDCSClient::GetErrorString(result))); |
73abe331 | 1206 | |
2c15234c | 1207 | if (result == AliDCSClient::fgkServerError) |
1208 | { | |
2bb7b766 | 1209 | Log(fCurrentDetector.Data(), Form("GetValueSet - Server error: %s", |
73abe331 | 1210 | client.GetServerError().Data())); |
1211 | } | |
1212 | ||
1213 | return kFALSE; | |
1214 | } | |
1215 | ||
1216 | return kTRUE; | |
1217 | } | |
b948db8d | 1218 | |
1219 | //______________________________________________________________________________________________ | |
57f50b3c | 1220 | const char* AliShuttle::GetFile(Int_t system, const char* detector, |
1221 | const char* id, const char* source) | |
b948db8d | 1222 | { |
57f50b3c | 1223 | // Get calibration file from file exchange servers |
9d733021 | 1224 | // First queris the FXS database for the file name, using the run, detector, id and source info |
1225 | // then calls RetrieveFile(filename) for actual copy to local disk | |
2bb7b766 | 1226 | // run: current run being processed (given by Logbook entry fLogbookEntry) |
eba76848 | 1227 | // detector: the Preprocessor name |
57f50b3c | 1228 | // id: provided as a parameter by the Preprocessor |
1229 | // source: provided by the Preprocessor through GetFileSources function | |
1230 | ||
1231 | // check connection, in case connect | |
9d733021 | 1232 | if (!Connect(system)) |
eba76848 | 1233 | { |
9d733021 | 1234 | Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system))); |
57f50b3c | 1235 | return 0; |
1236 | } | |
1237 | ||
1238 | // Query preparation | |
9d733021 | 1239 | TString sqlQueryStart; |
1240 | TString whereClause; | |
1241 | TString sourceName(source); | |
1242 | Int_t nFields = 0; | |
1243 | if (system == kDAQ) | |
1244 | { | |
1245 | sqlQueryStart = Form("select filePath,size from %s where", fConfig->GetFXSdbTable(system)); | |
1246 | whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\"", | |
eba76848 | 1247 | GetCurrentRun(), detector, id, source); |
9d733021 | 1248 | nFields = 2; |
57f50b3c | 1249 | |
57f50b3c | 1250 | } |
9d733021 | 1251 | else if (system == kDCS) |
eba76848 | 1252 | { |
9d733021 | 1253 | sqlQueryStart = Form("select filePath,size from %s where", fConfig->GetFXSdbTable(system)); |
1254 | whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"", | |
eba76848 | 1255 | GetCurrentRun(), detector, id); |
9d733021 | 1256 | nFields = 2; |
1257 | sourceName="none"; | |
57f50b3c | 1258 | } |
9d733021 | 1259 | else if (system == kHLT) |
9e080f92 | 1260 | { |
9d733021 | 1261 | sqlQueryStart = Form("select filePath,fileSize,fileChecksum from %s where", |
1262 | fConfig->GetFXSdbTable(system)); | |
1263 | whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\"", | |
1264 | GetCurrentRun(), detector, id, source); | |
1265 | nFields = 3; | |
9e080f92 | 1266 | } |
1267 | ||
9e080f92 | 1268 | TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data()); |
1269 | ||
1270 | AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data())); | |
1271 | ||
1272 | // Query execution | |
1273 | TSQLResult* aResult = 0; | |
9d733021 | 1274 | aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery)); |
9e080f92 | 1275 | if (!aResult) { |
9d733021 | 1276 | Log(detector, Form("GetFileName - Can't execute SQL query to %s database for: id = %s, source = %s", |
1277 | GetSystemName(system), id, sourceName.Data())); | |
9e080f92 | 1278 | return 0; |
1279 | } | |
1280 | ||
1281 | if(aResult->GetRowCount() == 0) | |
1282 | { | |
1283 | Log(detector, | |
9d733021 | 1284 | Form("GetFileName - No entry in %s FXS db for: id = %s, source = %s", |
1285 | GetSystemName(system), id, sourceName.Data())); | |
9e080f92 | 1286 | delete aResult; |
1287 | return 0; | |
1288 | } | |
2bb7b766 | 1289 | |
9e080f92 | 1290 | if (aResult->GetRowCount() > 1) { |
1291 | Log(detector, | |
9d733021 | 1292 | Form("GetFileName - More than one entry in %s FXS db for: id = %s, source = %s", |
1293 | GetSystemName(system), id, sourceName.Data())); | |
9e080f92 | 1294 | delete aResult; |
1295 | return 0; | |
1296 | } | |
1297 | ||
9d733021 | 1298 | if (aResult->GetFieldCount() != nFields) { |
9e080f92 | 1299 | Log(detector, |
9d733021 | 1300 | Form("GetFileName - Wrong field count in %s FXS db for: id = %s, source = %s", |
1301 | GetSystemName(system), id, sourceName.Data())); | |
9e080f92 | 1302 | delete aResult; |
1303 | return 0; | |
1304 | } | |
1305 | ||
1306 | TSQLRow* aRow = dynamic_cast<TSQLRow*> (aResult->Next()); | |
1307 | ||
1308 | if (!aRow){ | |
9d733021 | 1309 | Log(detector, Form("GetFileName - Empty set result in %s FXS db from query: id = %s, source = %s", |
1310 | GetSystemName(system), id, sourceName.Data())); | |
9e080f92 | 1311 | delete aResult; |
1312 | return 0; | |
1313 | } | |
1314 | ||
1315 | TString filePath(aRow->GetField(0), aRow->GetFieldLength(0)); | |
1316 | TString fileSize(aRow->GetField(1), aRow->GetFieldLength(1)); | |
9d733021 | 1317 | TString fileMd5Sum; |
1318 | if(system == kHLT) fileMd5Sum = aRow->GetField(2); | |
9e080f92 | 1319 | |
1320 | delete aResult; | |
1321 | delete aRow; | |
1322 | ||
1323 | AliDebug(2, Form("filePath = %s",filePath.Data())); | |
1324 | ||
9e080f92 | 1325 | // retrieved file is renamed to make it unique |
9d733021 | 1326 | TString localFileName = Form("%s_%s_%d_%s_%s.shuttle", |
1327 | GetSystemName(system), detector, GetCurrentRun(), id, sourceName.Data()); | |
1328 | ||
9e080f92 | 1329 | |
9d733021 | 1330 | // file retrieval from FXS |
4b95672b | 1331 | UInt_t nRetries = 0; |
1332 | UInt_t maxRetries = 3; | |
1333 | Bool_t result = kFALSE; | |
1334 | ||
1335 | // copy!! if successful TSystem::Exec returns 0 | |
1336 | while(nRetries++ < maxRetries) { | |
1337 | AliDebug(2, Form("Trying to copy file. Retry # %d", nRetries)); | |
1338 | result = RetrieveFile(system, filePath.Data(), localFileName.Data()); | |
1339 | if(!result) | |
1340 | { | |
1341 | Log(detector, Form("GetFileName - Copy of file %s from %s FXS failed", | |
9d733021 | 1342 | filePath.Data(), GetSystemName(system))); |
4b95672b | 1343 | continue; |
1344 | } else { | |
1345 | AliInfo(Form("File %s copied from %s FXS into %s/%s", | |
1346 | filePath.Data(), GetSystemName(system), | |
1347 | GetShuttleTempDir(), localFileName.Data())); | |
1348 | } | |
9e080f92 | 1349 | |
4b95672b | 1350 | if (system == kHLT) |
1351 | { | |
1352 | // compare md5sum of local file with the one stored in the FXS DB | |
1353 | Int_t md5Comp = gSystem->Exec(Form("md5sum %s/%s |grep %s 2>&1 > /dev/null", | |
36c99a6a | 1354 | GetShuttleTempDir(), localFileName.Data(), fileMd5Sum.Data())); |
9e080f92 | 1355 | |
4b95672b | 1356 | if (md5Comp != 0) |
1357 | { | |
1358 | Log(detector, Form("GetFileName - md5sum of file %s does not match with local copy!", | |
1359 | filePath.Data())); | |
1360 | result = kFALSE; | |
1361 | continue; | |
1362 | } | |
9d733021 | 1363 | } |
4b95672b | 1364 | if (result) break; |
9e080f92 | 1365 | } |
1366 | ||
4b95672b | 1367 | if(!result) return 0; |
1368 | ||
9d733021 | 1369 | fFXSCalled[system]=kTRUE; |
1370 | TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data())); | |
1371 | fFXSlist[system].Add(fileParams); | |
9e080f92 | 1372 | |
1373 | static TString fullLocalFileName; | |
36c99a6a | 1374 | fullLocalFileName = TString::Format("%s/%s", GetShuttleTempDir(), localFileName.Data()); |
1375 | ||
9e080f92 | 1376 | AliInfo(Form("fullLocalFileName = %s", fullLocalFileName.Data())); |
1377 | ||
1378 | return fullLocalFileName.Data(); | |
2bb7b766 | 1379 | |
1380 | } | |
1381 | ||
1382 | //______________________________________________________________________________________________ | |
9d733021 | 1383 | Bool_t AliShuttle::RetrieveFile(UInt_t system, const char* fxsFileName, const char* localFileName) |
9e080f92 | 1384 | { |
9d733021 | 1385 | // Copies file from FXS to local Shuttle machine |
2bb7b766 | 1386 | |
9e080f92 | 1387 | // check temp directory: trying to cd to temp; if it does not exist, create it |
9d733021 | 1388 | AliDebug(2, Form("Copy file %s from %s FXS into %s/%s", |
1389 | GetSystemName(system), fxsFileName, GetShuttleTempDir(), localFileName)); | |
9e080f92 | 1390 | |
36c99a6a | 1391 | void* dir = gSystem->OpenDirectory(GetShuttleTempDir()); |
9e080f92 | 1392 | if (dir == NULL) { |
36c99a6a | 1393 | if (gSystem->mkdir(GetShuttleTempDir(), kTRUE)) { |
1394 | AliError(Form("Can't open directory <%s>", GetShuttleTempDir())); | |
9e080f92 | 1395 | return kFALSE; |
1396 | } | |
1397 | ||
1398 | } else { | |
1399 | gSystem->FreeDirectory(dir); | |
1400 | } | |
1401 | ||
9d733021 | 1402 | TString baseFXSFolder; |
1403 | if (system == kDAQ) | |
1404 | { | |
1405 | baseFXSFolder = "FES/"; | |
1406 | } | |
1407 | else if (system == kDCS) | |
1408 | { | |
1409 | baseFXSFolder = ""; | |
1410 | } | |
1411 | else if (system == kHLT) | |
1412 | { | |
1413 | baseFXSFolder = "~/"; | |
1414 | } | |
1415 | ||
1416 | ||
1417 | TString command = Form("scp -oPort=%d -2 %s@%s:%s%s %s/%s", | |
1418 | fConfig->GetFXSPort(system), | |
1419 | fConfig->GetFXSUser(system), | |
1420 | fConfig->GetFXSHost(system), | |
1421 | baseFXSFolder.Data(), | |
1422 | fxsFileName, | |
36c99a6a | 1423 | GetShuttleTempDir(), |
9e080f92 | 1424 | localFileName); |
1425 | ||
1426 | AliDebug(2, Form("%s",command.Data())); | |
1427 | ||
4b95672b | 1428 | Bool_t result = (gSystem->Exec(command.Data()) == 0); |
9e080f92 | 1429 | |
4b95672b | 1430 | return result; |
9e080f92 | 1431 | } |
1432 | ||
1433 | //______________________________________________________________________________________________ | |
9d733021 | 1434 | TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char* id) |
1435 | { | |
1436 | // Get sources producing the condition file Id from file exchange servers | |
1437 | ||
1438 | if (system == kDCS) | |
1439 | { | |
1440 | AliError("DCS system has only one source of data!"); | |
1441 | return NULL; | |
1442 | ||
1443 | } | |
9e080f92 | 1444 | |
1445 | // check connection, in case connect | |
9d733021 | 1446 | if (!Connect(system)) |
1447 | { | |
1448 | Log(detector, Form("GetFile - Couldn't connect to %s FXS database", GetSystemName(system))); | |
1449 | return NULL; | |
9e080f92 | 1450 | } |
1451 | ||
9d733021 | 1452 | TString sourceName = 0; |
1453 | if (system == kDAQ) | |
1454 | { | |
1455 | sourceName = "DAQsource"; | |
1456 | } else if (system == kHLT) | |
1457 | { | |
1458 | sourceName = "DDLnumbers"; | |
1459 | } | |
1460 | ||
1461 | TString sqlQueryStart = Form("select %s from %s where", sourceName.Data(), fConfig->GetFXSdbTable(kDAQ)); | |
9e080f92 | 1462 | TString whereClause = Form("run=%d and detector=\"%s\" and fileId=\"%s\"", |
1463 | GetCurrentRun(), detector, id); | |
1464 | TString sqlQuery = Form("%s %s", sqlQueryStart.Data(), whereClause.Data()); | |
1465 | ||
1466 | AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data())); | |
1467 | ||
1468 | // Query execution | |
1469 | TSQLResult* aResult; | |
9d733021 | 1470 | aResult = fServer[system]->Query(sqlQuery); |
9e080f92 | 1471 | if (!aResult) { |
9d733021 | 1472 | Log(detector, Form("GetFileSources - Can't execute SQL query to %s database for id: %s", |
1473 | GetSystemName(system), id)); | |
9e080f92 | 1474 | return 0; |
1475 | } | |
1476 | ||
9d733021 | 1477 | if (aResult->GetRowCount() == 0) |
1478 | { | |
9e080f92 | 1479 | Log(detector, |
9d733021 | 1480 | Form("GetFileSources - No entry in %s FXS table for id: %s", GetSystemName(system), id)); |
9e080f92 | 1481 | delete aResult; |
1482 | return 0; | |
1483 | } | |
1484 | ||
1485 | TSQLRow* aRow; | |
1486 | TList *list = new TList(); | |
1487 | list->SetOwner(1); | |
1488 | ||
9d733021 | 1489 | while ((aRow = aResult->Next())) |
1490 | { | |
9e080f92 | 1491 | |
9d733021 | 1492 | TString source(aRow->GetField(0), aRow->GetFieldLength(0)); |
1493 | AliDebug(2, Form("%s = %s", sourceName.Data(), source.Data())); | |
1494 | list->Add(new TObjString(source)); | |
9e080f92 | 1495 | delete aRow; |
1496 | } | |
9d733021 | 1497 | |
9e080f92 | 1498 | delete aResult; |
1499 | ||
1500 | return list; | |
2bb7b766 | 1501 | } |
1502 | ||
1503 | //______________________________________________________________________________________________ | |
9d733021 | 1504 | Bool_t AliShuttle::Connect(Int_t system) |
2bb7b766 | 1505 | { |
9d733021 | 1506 | // Connect to MySQL Server of the system's FXS MySQL databases |
1507 | // DAQ Logbook, Shuttle Logbook and DAQ FXS db are on the same host | |
57f50b3c | 1508 | |
9d733021 | 1509 | // check connection: if already connected return |
1510 | if(fServer[system] && fServer[system]->IsConnected()) return kTRUE; | |
57f50b3c | 1511 | |
9d733021 | 1512 | TString dbHost, dbUser, dbPass, dbName; |
57f50b3c | 1513 | |
9d733021 | 1514 | if (system < 3) // FXS db servers |
1515 | { | |
1516 | dbHost = Form("mysql://%s:%d", fConfig->GetFXSdbHost(system), fConfig->GetFXSdbPort(system)); | |
1517 | dbUser = fConfig->GetFXSdbUser(system); | |
1518 | dbPass = fConfig->GetFXSdbPass(system); | |
1519 | dbName = fConfig->GetFXSdbName(system); | |
1520 | } else { // Run & Shuttle logbook servers | |
1521 | // TODO Will the Shuttle logbook server be the same as the Run logbook server ??? | |
1522 | dbHost = Form("mysql://%s:%d", fConfig->GetDAQlbHost(), fConfig->GetDAQlbPort()); | |
1523 | dbUser = fConfig->GetDAQlbUser(); | |
1524 | dbPass = fConfig->GetDAQlbPass(); | |
1525 | dbName = fConfig->GetDAQlbDB(); | |
1526 | } | |
57f50b3c | 1527 | |
9d733021 | 1528 | fServer[system] = TSQLServer::Connect(dbHost.Data(), dbUser.Data(), dbPass.Data()); |
1529 | if (!fServer[system] || !fServer[system]->IsConnected()) { | |
1530 | if(system < 3) | |
1531 | { | |
1532 | AliError(Form("Can't establish connection to FXS database for %s", | |
1533 | AliShuttleInterface::GetSystemName(system))); | |
1534 | } else { | |
1535 | AliError("Can't establish connection to Run logbook."); | |
57f50b3c | 1536 | } |
9d733021 | 1537 | if(fServer[system]) delete fServer[system]; |
1538 | return kFALSE; | |
2bb7b766 | 1539 | } |
57f50b3c | 1540 | |
9d733021 | 1541 | // Get tables |
1542 | TSQLResult* aResult=0; | |
1543 | switch(system){ | |
1544 | case kDAQ: | |
1545 | aResult = fServer[kDAQ]->GetTables(dbName.Data()); | |
1546 | break; | |
1547 | case kDCS: | |
1548 | aResult = fServer[kDCS]->GetTables(dbName.Data()); | |
1549 | break; | |
1550 | case kHLT: | |
1551 | aResult = fServer[kHLT]->GetTables(dbName.Data()); | |
1552 | break; | |
1553 | default: | |
1554 | aResult = fServer[3]->GetTables(dbName.Data()); | |
1555 | break; | |
1556 | } | |
1557 | ||
1558 | delete aResult; | |
2bb7b766 | 1559 | return kTRUE; |
1560 | } | |
57f50b3c | 1561 | |
9e080f92 | 1562 | //______________________________________________________________________________________________ |
9d733021 | 1563 | Bool_t AliShuttle::UpdateTable() |
9e080f92 | 1564 | { |
9d733021 | 1565 | // Update FXS table filling time_processed field in all rows corresponding to current run and detector |
9e080f92 | 1566 | |
9d733021 | 1567 | Bool_t result = kTRUE; |
9e080f92 | 1568 | |
9d733021 | 1569 | for (UInt_t system=0; system<3; system++) |
1570 | { | |
1571 | if(!fFXSCalled[system]) continue; | |
9e080f92 | 1572 | |
9d733021 | 1573 | // check connection, in case connect |
1574 | if (!Connect(system)) | |
1575 | { | |
1576 | Log(fCurrentDetector, Form("UpdateTable - Couldn't connect to %s FXS database", GetSystemName(system))); | |
1577 | result = kFALSE; | |
1578 | continue; | |
9e080f92 | 1579 | } |
9e080f92 | 1580 | |
9d733021 | 1581 | TTimeStamp now; // now |
1582 | ||
1583 | // Loop on FXS list entries | |
1584 | TIter iter(&fFXSlist[system]); | |
1585 | TObjString *aFXSentry=0; | |
1586 | while ((aFXSentry = dynamic_cast<TObjString*> (iter.Next()))) | |
1587 | { | |
1588 | TString aFXSentrystr = aFXSentry->String(); | |
1589 | TObjArray *aFXSarray = aFXSentrystr.Tokenize("#!?!#"); | |
1590 | if (!aFXSarray || aFXSarray->GetEntries() != 2 ) | |
1591 | { | |
1592 | Log(fCurrentDetector, Form("UpdateTable - error updating %s FXS entry. Check string: <%s>", | |
1593 | GetSystemName(system), aFXSentrystr.Data())); | |
1594 | if(aFXSarray) delete aFXSarray; | |
1595 | result = kFALSE; | |
1596 | continue; | |
1597 | } | |
1598 | const char* fileId = ((TObjString*) aFXSarray->At(0))->GetName(); | |
1599 | const char* source = ((TObjString*) aFXSarray->At(1))->GetName(); | |
1600 | ||
1601 | TString whereClause; | |
1602 | if (system == kDAQ) | |
1603 | { | |
1604 | whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DAQsource=\"%s\";", | |
1605 | GetCurrentRun(), fCurrentDetector.Data(), fileId, source); | |
1606 | } | |
1607 | else if (system == kDCS) | |
1608 | { | |
1609 | whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\";", | |
1610 | GetCurrentRun(), fCurrentDetector.Data(), fileId); | |
1611 | } | |
1612 | else if (system == kHLT) | |
1613 | { | |
1614 | whereClause = Form("where run=%d and detector=\"%s\" and fileId=\"%s\" and DDLnumbers=\"%s\";", | |
1615 | GetCurrentRun(), fCurrentDetector.Data(), fileId, source); | |
1616 | } | |
1617 | ||
1618 | delete aFXSarray; | |
9e080f92 | 1619 | |
9d733021 | 1620 | TString sqlQuery = Form("update %s set time_processed=%d %s", fConfig->GetFXSdbTable(system), |
1621 | now.GetSec(), whereClause.Data()); | |
9e080f92 | 1622 | |
9d733021 | 1623 | AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data())); |
9e080f92 | 1624 | |
9d733021 | 1625 | // Query execution |
1626 | TSQLResult* aResult; | |
1627 | aResult = dynamic_cast<TSQLResult*> (fServer[system]->Query(sqlQuery)); | |
1628 | if (!aResult) | |
1629 | { | |
1630 | Log(fCurrentDetector, Form("UpdateTable - %s db: can't execute SQL query <%s>", | |
1631 | GetSystemName(system), sqlQuery.Data())); | |
1632 | result = kFALSE; | |
1633 | continue; | |
1634 | } | |
1635 | delete aResult; | |
9e080f92 | 1636 | } |
9e080f92 | 1637 | } |
1638 | ||
9d733021 | 1639 | return result; |
9e080f92 | 1640 | } |
57f50b3c | 1641 | |
2bb7b766 | 1642 | //______________________________________________________________________________________________ |
1643 | Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status) | |
1644 | { | |
e7f62f16 | 1645 | // |
1646 | // Update Shuttle logbook filling detector or shuttle_done column | |
1647 | // ex. of usage: UpdateShuttleLogbook("PHOS", "DONE") or UpdateShuttleLogbook("shuttle_done") | |
1648 | // | |
57f50b3c | 1649 | |
2bb7b766 | 1650 | // check connection, in case connect |
be48e3ea | 1651 | if(!Connect(3)){ |
2bb7b766 | 1652 | Log("SHUTTLE", "UpdateShuttleLogbook - Couldn't connect to DAQ Logbook."); |
1653 | return kFALSE; | |
57f50b3c | 1654 | } |
1655 | ||
2bb7b766 | 1656 | TString detName(detector); |
1657 | TString setClause; | |
e7f62f16 | 1658 | if(detName == "shuttle_done") |
1659 | { | |
2bb7b766 | 1660 | setClause = "set shuttle_done=1"; |
e7f62f16 | 1661 | |
1662 | // Send the information to ML | |
1663 | TMonaLisaText mlStatus("SHUTTLE_status", "Done"); | |
1664 | ||
1665 | TList mlList; | |
1666 | mlList.Add(&mlStatus); | |
1667 | ||
1668 | fMonaLisa->SendParameters(&mlList); | |
2bb7b766 | 1669 | } else { |
2bb7b766 | 1670 | TString statusStr(status); |
1671 | if(statusStr.Contains("done", TString::kIgnoreCase) || | |
1672 | statusStr.Contains("failed", TString::kIgnoreCase)){ | |
eba76848 | 1673 | setClause = Form("set %s=\"%s\"", detector, status); |
2bb7b766 | 1674 | } else { |
1675 | Log("SHUTTLE", | |
1676 | Form("UpdateShuttleLogbook - Invalid status <%s> for detector %s", | |
1677 | status, detector)); | |
1678 | return kFALSE; | |
1679 | } | |
1680 | } | |
57f50b3c | 1681 | |
2bb7b766 | 1682 | TString whereClause = Form("where run=%d", GetCurrentRun()); |
1683 | ||
1684 | TString sqlQuery = Form("update logbook_shuttle %s %s", | |
1685 | setClause.Data(), whereClause.Data()); | |
57f50b3c | 1686 | |
2bb7b766 | 1687 | AliDebug(2, Form("SQL query: \n%s",sqlQuery.Data())); |
1688 | ||
1689 | // Query execution | |
1690 | TSQLResult* aResult; | |
be48e3ea | 1691 | aResult = dynamic_cast<TSQLResult*> (fServer[3]->Query(sqlQuery)); |
2bb7b766 | 1692 | if (!aResult) { |
1693 | Log("SHUTTLE", Form("UpdateShuttleLogbook - Can't execute query <%s>", sqlQuery.Data())); | |
1694 | return kFALSE; | |
57f50b3c | 1695 | } |
2bb7b766 | 1696 | delete aResult; |
57f50b3c | 1697 | |
1698 | return kTRUE; | |
1699 | } | |
1700 | ||
1701 | //______________________________________________________________________________________________ | |
2bb7b766 | 1702 | Int_t AliShuttle::GetCurrentRun() const |
1703 | { | |
1704 | // Get current run from logbook entry | |
57f50b3c | 1705 | |
2bb7b766 | 1706 | return fLogbookEntry ? fLogbookEntry->GetRun() : -1; |
57f50b3c | 1707 | } |
1708 | ||
1709 | //______________________________________________________________________________________________ | |
2bb7b766 | 1710 | UInt_t AliShuttle::GetCurrentStartTime() const |
1711 | { | |
1712 | // get current start time | |
57f50b3c | 1713 | |
2bb7b766 | 1714 | return fLogbookEntry ? fLogbookEntry->GetStartTime() : 0; |
57f50b3c | 1715 | } |
1716 | ||
1717 | //______________________________________________________________________________________________ | |
2bb7b766 | 1718 | UInt_t AliShuttle::GetCurrentEndTime() const |
1719 | { | |
1720 | // get current end time from logbook entry | |
57f50b3c | 1721 | |
2bb7b766 | 1722 | return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0; |
57f50b3c | 1723 | } |
1724 | ||
b948db8d | 1725 | //______________________________________________________________________________________________ |
1726 | void AliShuttle::Log(const char* detector, const char* message) | |
1727 | { | |
58bc3020 | 1728 | // Fill log string with a message |
b948db8d | 1729 | |
36c99a6a | 1730 | void* dir = gSystem->OpenDirectory(GetShuttleLogDir()); |
84090f85 | 1731 | if (dir == NULL) { |
36c99a6a | 1732 | if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) { |
1733 | AliError(Form("Can't open directory <%s>", GetShuttleLogDir())); | |
84090f85 | 1734 | return; |
1735 | } | |
b948db8d | 1736 | |
84090f85 | 1737 | } else { |
1738 | gSystem->FreeDirectory(dir); | |
1739 | } | |
b948db8d | 1740 | |
cb343cfd | 1741 | TString toLog = Form("%s (%d): %s - ", TTimeStamp(time(0)).AsString("s"), getpid(), detector); |
e7f62f16 | 1742 | if (GetCurrentRun() >= 0) |
1743 | toLog += Form("run %d - ", GetCurrentRun()); | |
2bb7b766 | 1744 | toLog += Form("%s", message); |
1745 | ||
84090f85 | 1746 | AliInfo(toLog.Data()); |
b948db8d | 1747 | |
84090f85 | 1748 | TString fileName; |
e7f62f16 | 1749 | if (GetCurrentRun() >= 0) |
1750 | fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun()); | |
1751 | else | |
1752 | fileName.Form("%s/%s.log", GetShuttleLogDir(), detector); | |
1753 | ||
84090f85 | 1754 | gSystem->ExpandPathName(fileName); |
1755 | ||
1756 | ofstream logFile; | |
1757 | logFile.open(fileName, ofstream::out | ofstream::app); | |
1758 | ||
1759 | if (!logFile.is_open()) { | |
1760 | AliError(Form("Could not open file %s", fileName.Data())); | |
1761 | return; | |
1762 | } | |
7bfb2090 | 1763 | |
84090f85 | 1764 | logFile << toLog.Data() << "\n"; |
b948db8d | 1765 | |
84090f85 | 1766 | logFile.close(); |
b948db8d | 1767 | } |
2bb7b766 | 1768 | |
2bb7b766 | 1769 | //______________________________________________________________________________________________ |
1770 | Bool_t AliShuttle::Collect(Int_t run) | |
1771 | { | |
eba76848 | 1772 | // |
1773 | // Collects conditions data for all UNPROCESSED run written to DAQ LogBook in case of run = -1 (default) | |
1774 | // If a dedicated run is given this run is processed | |
1775 | // | |
1776 | // In operational mode, this is the Shuttle function triggered by the EOR signal. | |
1777 | // | |
2bb7b766 | 1778 | |
eba76848 | 1779 | if (run == -1) |
1780 | Log("SHUTTLE","Collect - Shuttle called. Collecting conditions data for unprocessed runs"); | |
1781 | else | |
1782 | Log("SHUTTLE", Form("Collect - Shuttle called. Collecting conditions data for run %d", run)); | |
cb343cfd | 1783 | |
1784 | SetLastAction("Starting"); | |
2bb7b766 | 1785 | |
1786 | TString whereClause("where shuttle_done=0"); | |
eba76848 | 1787 | if (run != -1) |
1788 | whereClause += Form(" and run=%d", run); | |
2bb7b766 | 1789 | |
1790 | TObjArray shuttleLogbookEntries; | |
be48e3ea | 1791 | if (!QueryShuttleLogbook(whereClause, shuttleLogbookEntries)) |
1792 | { | |
cb343cfd | 1793 | Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook"); |
2bb7b766 | 1794 | return kFALSE; |
1795 | } | |
1796 | ||
9e080f92 | 1797 | if (shuttleLogbookEntries.GetEntries() == 0) |
1798 | { | |
1799 | if (run == -1) | |
1800 | Log("SHUTTLE","Collect - Found no UNPROCESSED runs in Shuttle logbook"); | |
1801 | else | |
1802 | Log("SHUTTLE", Form("Collect - Run %d is already DONE " | |
1803 | "or it does not exist in Shuttle logbook", run)); | |
1804 | return kTRUE; | |
1805 | } | |
1806 | ||
be48e3ea | 1807 | for (UInt_t iDet=0; iDet<NDetectors(); iDet++) |
1808 | fFirstUnprocessed[iDet] = kTRUE; | |
1809 | ||
fc5a4708 | 1810 | if (run != -1) |
be48e3ea | 1811 | { |
1812 | // query Shuttle logbook for earlier runs, check if some detectors are unprocessed, | |
1813 | // flag them into fFirstUnprocessed array | |
1814 | TString whereClause(Form("where shuttle_done=0 and run < %d", run)); | |
1815 | TObjArray tmpLogbookEntries; | |
1816 | if (!QueryShuttleLogbook(whereClause, tmpLogbookEntries)) | |
1817 | { | |
1818 | Log("SHUTTLE", "Collect - Can't retrieve entries from Shuttle logbook"); | |
1819 | return kFALSE; | |
1820 | } | |
1821 | ||
1822 | TIter iter(&tmpLogbookEntries); | |
1823 | AliShuttleLogbookEntry* anEntry = 0; | |
1824 | while ((anEntry = dynamic_cast<AliShuttleLogbookEntry*> (iter.Next()))) | |
1825 | { | |
1826 | for (UInt_t iDet=0; iDet<NDetectors(); iDet++) | |
1827 | { | |
1828 | if (anEntry->GetDetectorStatus(iDet) == AliShuttleLogbookEntry::kUnprocessed) | |
1829 | { | |
1830 | AliDebug(2, Form("Run %d: setting %s as \"not first time unprocessed\"", | |
1831 | anEntry->GetRun(), GetDetName(iDet))); | |
1832 | fFirstUnprocessed[iDet] = kFALSE; | |
1833 | } | |
1834 | } | |
1835 | ||
1836 | } | |
1837 | ||
1838 | } | |
1839 | ||
1840 | if (!RetrieveConditionsData(shuttleLogbookEntries)) | |
1841 | { | |
cb343cfd | 1842 | Log("SHUTTLE", "Collect - Process of at least one run failed"); |
2bb7b766 | 1843 | return kFALSE; |
1844 | } | |
1845 | ||
36c99a6a | 1846 | Log("SHUTTLE", "Collect - Requested run(s) successfully processed"); |
eba76848 | 1847 | return kTRUE; |
2bb7b766 | 1848 | } |
1849 | ||
2bb7b766 | 1850 | //______________________________________________________________________________________________ |
1851 | Bool_t AliShuttle::RetrieveConditionsData(const TObjArray& dateEntries) | |
1852 | { | |
1853 | // Retrieve conditions data for all runs that aren't processed yet | |
1854 | ||
1855 | Bool_t hasError = kFALSE; | |
1856 | ||
1857 | TIter iter(&dateEntries); | |
1858 | AliShuttleLogbookEntry* anEntry; | |
1859 | ||
1860 | while ((anEntry = (AliShuttleLogbookEntry*) iter.Next())){ | |
1861 | if (!Process(anEntry)){ | |
1862 | hasError = kTRUE; | |
1863 | } | |
4b95672b | 1864 | |
1865 | // clean SHUTTLE temp directory | |
1866 | TString command = Form("rm -f %s/*.shuttle", GetShuttleTempDir()); | |
1867 | gSystem->Exec(command.Data()); | |
2bb7b766 | 1868 | } |
1869 | ||
1870 | return hasError == kFALSE; | |
1871 | } | |
cb343cfd | 1872 | |
1873 | //______________________________________________________________________________________________ | |
1874 | ULong_t AliShuttle::GetTimeOfLastAction() const | |
1875 | { | |
1876 | ULong_t tmp; | |
36c99a6a | 1877 | |
cb343cfd | 1878 | fMonitoringMutex->Lock(); |
be48e3ea | 1879 | |
cb343cfd | 1880 | tmp = fLastActionTime; |
36c99a6a | 1881 | |
cb343cfd | 1882 | fMonitoringMutex->UnLock(); |
36c99a6a | 1883 | |
cb343cfd | 1884 | return tmp; |
1885 | } | |
1886 | ||
1887 | //______________________________________________________________________________________________ | |
1888 | const TString AliShuttle::GetLastAction() const | |
1889 | { | |
1890 | // returns a string description of the last action | |
1891 | ||
1892 | TString tmp; | |
36c99a6a | 1893 | |
cb343cfd | 1894 | fMonitoringMutex->Lock(); |
1895 | ||
1896 | tmp = fLastAction; | |
1897 | ||
1898 | fMonitoringMutex->UnLock(); | |
1899 | ||
36c99a6a | 1900 | return tmp; |
cb343cfd | 1901 | } |
1902 | ||
1903 | //______________________________________________________________________________________________ | |
1904 | void AliShuttle::SetLastAction(const char* action) | |
1905 | { | |
1906 | // updates the monitoring variables | |
36c99a6a | 1907 | |
cb343cfd | 1908 | fMonitoringMutex->Lock(); |
36c99a6a | 1909 | |
cb343cfd | 1910 | fLastAction = action; |
1911 | fLastActionTime = time(0); | |
1912 | ||
1913 | fMonitoringMutex->UnLock(); | |
1914 | } | |
eba76848 | 1915 | |
1916 | //______________________________________________________________________________________________ | |
1917 | const char* AliShuttle::GetRunParameter(const char* param) | |
1918 | { | |
1919 | // returns run parameter read from DAQ logbook | |
1920 | ||
1921 | if(!fLogbookEntry) { | |
1922 | AliError("No logbook entry!"); | |
1923 | return 0; | |
1924 | } | |
1925 | ||
1926 | return fLogbookEntry->GetRunParameter(param); | |
1927 | } | |
57c1a579 | 1928 | |
1929 | //______________________________________________________________________________________________ | |
1930 | Bool_t AliShuttle::SendMail() | |
1931 | { | |
1932 | // sends a mail to the subdetector expert in case of preprocessor error | |
1933 | ||
36c99a6a | 1934 | void* dir = gSystem->OpenDirectory(GetShuttleLogDir()); |
57c1a579 | 1935 | if (dir == NULL) |
1936 | { | |
36c99a6a | 1937 | if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) |
57c1a579 | 1938 | { |
36c99a6a | 1939 | AliError(Form("Can't open directory <%s>", GetShuttleLogDir())); |
57c1a579 | 1940 | return kFALSE; |
1941 | } | |
1942 | ||
1943 | } else { | |
1944 | gSystem->FreeDirectory(dir); | |
1945 | } | |
1946 | ||
1947 | TString bodyFileName; | |
36c99a6a | 1948 | bodyFileName.Form("%s/mail.body", GetShuttleLogDir()); |
57c1a579 | 1949 | gSystem->ExpandPathName(bodyFileName); |
1950 | ||
1951 | ofstream mailBody; | |
1952 | mailBody.open(bodyFileName, ofstream::out); | |
1953 | ||
1954 | if (!mailBody.is_open()) | |
1955 | { | |
1956 | AliError(Form("Could not open mail body file %s", bodyFileName.Data())); | |
1957 | return kFALSE; | |
1958 | } | |
1959 | ||
1960 | TString to=""; | |
1961 | TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector)); | |
1962 | TObjString *anExpert=0; | |
1963 | while ((anExpert = (TObjString*) iterExperts.Next())) | |
1964 | { | |
1965 | to += Form("%s,", anExpert->GetName()); | |
1966 | } | |
1967 | to.Remove(to.Length()-1); | |
909732f7 | 1968 | AliDebug(2, Form("to: %s",to.Data())); |
57c1a579 | 1969 | |
36c99a6a | 1970 | // TODO this will be removed... |
1971 | if (to.Contains("not_yet_set")) { | |
1972 | AliInfo("List of detector responsibles not yet set!"); | |
1973 | return kFALSE; | |
1974 | } | |
1975 | ||
57c1a579 | 1976 | TString cc="alberto.colla@cern.ch"; |
1977 | ||
1978 | TString subject = Form("%s Shuttle preprocessor error in run %d !", | |
1979 | fCurrentDetector.Data(), GetCurrentRun()); | |
909732f7 | 1980 | AliDebug(2, Form("subject: %s", subject.Data())); |
57c1a579 | 1981 | |
1982 | TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data()); | |
1983 | body += Form("SHUTTLE just detected that your preprocessor " | |
36c99a6a | 1984 | "exited with ERROR state in run %d!!\n\n", GetCurrentRun()); |
57c1a579 | 1985 | body += Form("Please check %s status on the web page asap!\n\n", fCurrentDetector.Data()); |
1986 | body += Form("The last 10 lines of %s log file are following:\n\n"); | |
1987 | ||
909732f7 | 1988 | AliDebug(2, Form("Body begin: %s", body.Data())); |
57c1a579 | 1989 | |
1990 | mailBody << body.Data(); | |
1991 | mailBody.close(); | |
1992 | mailBody.open(bodyFileName, ofstream::out | ofstream::app); | |
1993 | ||
9d733021 | 1994 | TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun()); |
57c1a579 | 1995 | TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data()); |
1996 | if (gSystem->Exec(tailCommand.Data())) | |
1997 | { | |
1998 | mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data()); | |
1999 | } | |
2000 | ||
2001 | TString endBody = Form("------------------------------------------------------\n\n"); | |
36c99a6a | 2002 | endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n"); |
2003 | endBody += "Please do not answer this message directly, it is automatically generated.\n\n"; | |
57c1a579 | 2004 | endBody += "Sincerely yours,\n\n \t\t\tthe SHUTTLE\n"; |
2005 | ||
909732f7 | 2006 | AliDebug(2, Form("Body end: %s", endBody.Data())); |
57c1a579 | 2007 | |
2008 | mailBody << endBody.Data(); | |
2009 | ||
2010 | mailBody.close(); | |
2011 | ||
2012 | // send mail! | |
2013 | TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s", | |
2014 | subject.Data(), | |
2015 | cc.Data(), | |
2016 | to.Data(), | |
2017 | bodyFileName.Data()); | |
909732f7 | 2018 | AliDebug(2, Form("mail command: %s", mailCommand.Data())); |
57c1a579 | 2019 | |
2020 | Bool_t result = gSystem->Exec(mailCommand.Data()); | |
2021 | ||
2022 | return result == 0; | |
2023 | } | |
36c99a6a | 2024 | |
2025 | //______________________________________________________________________________________________ | |
2026 | void AliShuttle::SetShuttleTempDir(const char* tmpDir) | |
2027 | { | |
2028 | // sets Shuttle temp directory | |
2029 | ||
2030 | fgkShuttleTempDir = gSystem->ExpandPathName(tmpDir); | |
2031 | } | |
2032 | ||
2033 | //______________________________________________________________________________________________ | |
2034 | void AliShuttle::SetShuttleLogDir(const char* logDir) | |
2035 | { | |
2036 | // sets Shuttle log directory | |
2037 | ||
2038 | fgkShuttleLogDir = gSystem->ExpandPathName(logDir); | |
2039 | } |