]> git.uio.no Git - u/mrichter/AliRoot.git/blame - SHUTTLE/AliShuttleTrigger.cxx
Update n expected fields (26->27) for shuttle_logbook
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttleTrigger.cxx
CommitLineData
d477ad88 1/**************************************************************************
2 * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3 * *
4 * Author: The ALICE Off-line Project. *
5 * Contributors are mentioned in the code where appropriate. *
6 * *
7 * Permission to use, copy, modify and distribute this software and its *
8 * documentation strictly for non-commercial purposes is hereby granted *
9 * without fee, provided that the above copyright notice appears in all *
10 * copies and that both the copyright notice and this permission notice *
11 * appear in the supporting documentation. The authors make no claims *
12 * about the suitability of this software for any purpose. It is *
13 * provided "as is" without express or implied warranty. *
14 **************************************************************************/
15
16/*
17 $Log$
1abfbb60 18 Revision 1.15 2007/12/10 18:29:23 acolla
19 Some log added to the listen mode
20
6a926ad4 21 Revision 1.14 2007/12/07 19:14:36 acolla
22 in AliShuttleTrigger:
23
24 Added automatic collection of new runs on a regular time basis (settable from the configuration)
25
26 in AliShuttleConfig: new members
27
28 - triggerWait: time to wait for DIM trigger (s) before starting automatic collection of new runs
29 - mode: run mode (test, prod) -> used to build log folder (logs or logs_PROD)
30
31 in AliShuttle:
32
33 - logs now stored in logs/#RUN/DET_#RUN.log
34
7d4cf768 35 Revision 1.13 2006/11/16 16:16:48 jgrosseo
36 introducing strict run ordering flag
37 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
38
be48e3ea 39 Revision 1.12 2006/10/20 15:22:59 jgrosseo
40 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
41 o) Merging Collect, CollectAll, CollectNew function
42 o) Removing implementation of empty copy constructors (declaration still there!)
43
cb343cfd 44 Revision 1.11 2006/10/02 16:38:39 jgrosseo
45 update (alberto):
46 fixed memory leaks
47 storing of objects that failed to be stored to the grid before
48 interfacing of shuttle status table in daq system
49
2bb7b766 50 Revision 1.10 2006/08/15 10:50:00 jgrosseo
51 effc++ corrections (alberto)
52
4f0ab988 53 Revision 1.9 2006/08/08 14:19:29 jgrosseo
54 Update to shuttle classes (Alberto)
55
56 - Possibility to set the full object's path in the Preprocessor's and
57 Shuttle's Store functions
58 - Possibility to extend the object's run validity in the same classes
59 ("startValidity" and "validityInfinite" parameters)
60 - Implementation of the StoreReferenceData function to store reference
61 data in a dedicated CDB storage.
62
84090f85 63 Revision 1.8 2006/07/21 07:37:20 jgrosseo
64 last run is stored after each run
65
7bfb2090 66 Revision 1.7 2006/07/20 09:54:40 jgrosseo
67 introducing status management: The processing per subdetector is divided into several steps,
68 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
69 can keep track of the number of failures and skips further processing after a certain threshold is
70 exceeded. These thresholds can be configured in LDAP.
71
5164a766 72 Revision 1.6 2006/07/19 10:09:55 jgrosseo
73 new configuration, accesst to DAQ FES (Alberto)
74
57f50b3c 75 Revision 1.5 2006/07/10 13:01:41 jgrosseo
76 enhanced storing of last sucessfully processed run (alberto)
77
a7160fe9 78 Revision 1.4 2006/07/04 14:59:57 jgrosseo
79 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
80
45a493ce 81 Revision 1.3 2006/06/12 09:11:16 jgrosseo
82 coding conventions (Alberto)
83
58bc3020 84 Revision 1.2 2006/06/06 14:26:40 jgrosseo
85 o) removed files that were moved to STEER
86 o) shuttle updated to follow the new interface (Alberto)
87
b948db8d 88 Revision 1.1 2006/03/07 07:52:34 hristov
89 New version (B.Yordanov)
90
d477ad88 91 Revision 1.5 2005/11/21 09:03:48 byordano
92 one more print added
93
94 Revision 1.4 2005/11/20 10:12:37 byordano
95 comments added to AliShuttleTrigger
96
97 */
98
99
100//
101// This class is to deal with DAQ LogBook and DAQ "end of run" notification.
102// It has severeal two modes:
cb343cfd 103// 1) synchronized - Collect()
b948db8d 104// 2) asynchronized - Run() - starts listening for DAQ "end of run"
d477ad88 105// notification by DIM service.
106//
107
108#include "AliShuttleTrigger.h"
109
d477ad88 110#include <TSystem.h>
51657f6d 111#include <TGrid.h>
fb2975a2 112#include <TObjString.h>
cb343cfd 113
d477ad88 114#include "AliLog.h"
d477ad88 115#include "AliShuttleConfig.h"
116#include "AliShuttle.h"
117#include "DATENotifier.h"
118
fb2975a2 119#include <fstream>
120
d477ad88 121ClassImp(TerminateSignalHandler)
cb343cfd 122ClassImp(AliShuttleTrigger)
58bc3020 123
b948db8d 124//______________________________________________________________________________________________
cb343cfd 125Bool_t TerminateSignalHandler::Notify()
58bc3020 126{
127// Sentd terminate command to the Shuttle trigger
d477ad88 128
129 AliInfo("Terminate signal received ...");
130 fTrigger->Terminate();
131
132 return kTRUE;
133}
134
b948db8d 135//______________________________________________________________________________________________
ff3781ad 136AliShuttleTrigger::AliShuttleTrigger(const AliShuttleConfig* config):
b948db8d 137 fConfig(config), fShuttle(NULL),
2bb7b766 138 fNotified(kFALSE), fTerminate(kFALSE),
4f0ab988 139 fMutex(), fCondition(&fMutex),
cb343cfd 140 fQuitSignalHandler(0),
fb2975a2 141 fInterruptSignalHandler(0),
142 fLastMailDiskSpace(0)
d477ad88 143{
144 //
145 // config - pointer to the AliShuttleConfig object which represents
146 // the configuration
b948db8d 147 // mainStorage - pointer to AliCDBStorage for the undelying CDBStorage
148 // localStorage (local) CDB storage to be used if mainStorage is unavailable
d477ad88 149 //
150
7d4cf768 151 if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
ff3781ad 152 UInt_t timeout = fConfig->GetDCSTimeOut();
153 Int_t retries = fConfig->GetDCSRetries();
b948db8d 154 fShuttle = new AliShuttle(config, timeout, retries);
d477ad88 155
28a94b8e 156 fQuitSignalHandler = new TerminateSignalHandler(this, kSigQuit);
157 fInterruptSignalHandler = new TerminateSignalHandler(this, kSigInterrupt);
58bc3020 158
cb343cfd 159 gSystem->AddSignalHandler(fQuitSignalHandler);
160 gSystem->AddSignalHandler(fInterruptSignalHandler);
58bc3020 161
162}
163
b948db8d 164//______________________________________________________________________________________________
58bc3020 165AliShuttleTrigger::~AliShuttleTrigger()
166{
cb343cfd 167 // destructor
d477ad88 168
cb343cfd 169 gSystem->RemoveSignalHandler(fQuitSignalHandler);
170 gSystem->RemoveSignalHandler(fInterruptSignalHandler);
d477ad88 171
172 delete fShuttle;
cb343cfd 173
174 delete fQuitSignalHandler;
175 fQuitSignalHandler = 0;
176
177 delete fInterruptSignalHandler;
178 fInterruptSignalHandler = 0;
d477ad88 179}
180
b948db8d 181//______________________________________________________________________________________________
d477ad88 182Bool_t AliShuttleTrigger::Notify() {
183 //
cb343cfd 184 // Trigger Collect() methods in asynchronized (listen) mode.
d477ad88 185 // Usually called automaticly by DATENotifier on "end of run"
186 // notification event.
187 //
188
189 fMutex.Lock();
190
191 fNotified = kTRUE;
192 fCondition.Signal();
193
194 fMutex.UnLock();
195
196 return kTRUE;
197}
198
b948db8d 199//______________________________________________________________________________________________
d477ad88 200void AliShuttleTrigger::Terminate() {
201 //
202 // Stop triggers listen mode and exist from Run()
203 // Usually called automaticly by TerminateSignalHandler.
204 //
205
206 fTerminate = kTRUE;
207 fCondition.Signal();
208}
209
a5ecdb19 210//______________________________________________________________________________________________
211void AliShuttleTrigger::CheckTerminate()
212{
213 //
214 // Checks if the Shuttle got an external terminate request by a created file
215 // This is an alternative to the signal which causes problems with the API libraries
216 //
217
218 if (strlen(fConfig->GetTerminateFilePath()) == 0)
219 return;
220
94bf758b 221 if (gSystem->AccessPathName(fConfig->GetTerminateFilePath()) == kFALSE)
a5ecdb19 222 {
223 AliInfo("Terminate file exists. Terminating Shuttle...");
224 fTerminate = kTRUE;
225 }
226}
227
b948db8d 228//______________________________________________________________________________________________
d477ad88 229void AliShuttleTrigger::Run() {
230 //
231 // AliShuttleTrigger main loop for asynchronized (listen) mode.
232 // It spawns DIM service listener and waits for DAQ "end of run"
cb343cfd 233 // notification. Calls Collect() on notification.
d477ad88 234 //
235
236 fTerminate = kFALSE;
237
4a5d9e0d 238 DATENotifier* notifier = new DATENotifier(this, "/LOGBOOK/SUBSCRIBE/ECS_EOR");
d477ad88 239
6a926ad4 240 Int_t nTry=0;
241 Int_t nMaxTry = fConfig->GetMaxRetries()+1;
242 Int_t received=0;
243
244 AliInfo("Listening for ECS trigger");
7d4cf768 245
d477ad88 246 while (1) {
247
248 fMutex.Lock();
249
250 while (!(fNotified || fTerminate)) {
51657f6d 251 for (Int_t iwait = 0; iwait < 10; iwait++){
252 received = fCondition.TimedWaitRelative(1000*fConfig->GetTriggerWait()/10); // to keep the connection to the server alive every minute while waiting for new runs
253 if (received == 1) {
254 if (gGrid) {
255 AliInfo(Form("Keeping the connection to the server alive while waiting for new runs - %d waited from last one", fConfig->GetTriggerWait()/10*iwait));
256 gGrid->Pwd();
257 }
258 else {
259 AliInfo("No gGrid initialized so far, we cannot keep the connection to the server alive while waiting for new runs");
260 }
261 }
262 }
263 //received=fCondition.TimedWaitRelative(1000*fConfig->GetTriggerWait());
264 CheckTerminate();
265 if (received==1) break; // 1 = timeout
d477ad88 266 }
267
268 fNotified = kFALSE;
269
270 fMutex.UnLock();
271
272 if (fTerminate) {
273 AliInfo("Terminated.");
274 break;
275 }
7d4cf768 276
6a926ad4 277 if (received == 0)
278 {
279 AliInfo("Trigger from ECS received!");
280 } else if (received == 1) {
281 AliInfo(Form("Timeout (%d s) waiting for trigger. "
282 "Starting collection of new runs!",
283 fConfig->GetTriggerWait()));
284 } else {
285 AliInfo("Error receiving trigger from ECS!");
286 break;
287 }
288
1abfbb60 289 nTry++;
290 AliInfo(Form("Received %d triggers so far", nTry));
291
6a926ad4 292 if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
293 {
6a926ad4 294 if(nTry>=nMaxTry)
295 {
296 AliInfo(Form("Collect() ran more than %d times -> Exiting!",
297 nMaxTry));
298 break;
299 }
300 }
d477ad88 301
cb343cfd 302 Collect();
94bf758b 303 CheckTerminate();
d477ad88 304 }
305
306 delete notifier;
307}
308
b948db8d 309//______________________________________________________________________________________________
a7160fe9 310Bool_t AliShuttleTrigger::Collect(Int_t run)
58bc3020 311{
d477ad88 312 //
cb343cfd 313 // this function creates a thread that runs the shuttle
314 // then it checks if the shuttle is still running by checking the monitoring functions of the shuttle
d477ad88 315 //
316
fb2975a2 317 // first checking disk space
318 Long_t id = 0;
319 Long_t bsize = 0;
320 Long_t blocks = 0;
321 Long_t bfree = 0;
322
323 gSystem->GetFsInfo(fConfig->GetShuttleFileSystem(), &id, &bsize, &blocks, &bfree);
324
0a0b087a 325 AliInfo(Form("n. of free blocks = %ld, total n. of blocks = %ld",bfree,blocks));
fb2975a2 326 Int_t spaceFree = (Int_t)(((Float_t)bfree/(Float_t)blocks)*100);
327
328 if (spaceFree < fConfig->GetFreeDiskWarningThreshold()) {
329 AliWarning(Form("************** Free space left = %d%%, below the Warning Threshold (%d%%)",spaceFree,fConfig->GetFreeDiskWarningThreshold()));
330 if (TMath::Abs(time(0) - fLastMailDiskSpace) >= 86400){ // 86400 = n. of seconds in 1 d
331 SendMailDiskSpace(fConfig->GetFreeDiskWarningThreshold());
332 fLastMailDiskSpace = time(0); // resetting fLastMailDiskSpace to time(0) = now
333 }
334 if (spaceFree < fConfig->GetFreeDiskFatalThreshold()){
335 AliError(Form("*************** Free space left = %d%%, below the Fatal Threshold (%d%%), terminating....",spaceFree,fConfig->GetFreeDiskFatalThreshold()));
336 SendMailDiskSpace(fConfig->GetFreeDiskFatalThreshold());
337 fTerminate = kTRUE; // terminating....
338 }
339 }
340
341 if (fTerminate) {
342 return kFALSE;
343 }
344
345 return fShuttle->Collect(run);
346}
347//______________________________________________________________________________________________
348Bool_t AliShuttleTrigger::SendMailDiskSpace(Short_t percentage)
349{
350 //
351 // sends a mail to the shuttle experts in case of free disk space < theshold
352 //
353
354
355 AliInfo("******************* Sending the Mail!! *********************");
356 if (!fConfig->SendMail())
357 return kTRUE;
358
359 Int_t runMode = (Int_t)fConfig->GetRunMode();
360 TString tmpStr;
361 if (runMode == 0) tmpStr = " Nightly Test:";
362 else tmpStr = " Data Taking:";
363 void* dir = gSystem->OpenDirectory(fShuttle->GetShuttleLogDir());
364 if (dir == NULL)
365 {
366 if (gSystem->mkdir(fShuttle->GetShuttleLogDir(), kTRUE))
367 {
368 AliWarning(Form("SendMail - Can't open directory <%s>", fShuttle->GetShuttleLogDir()));
369 return kFALSE;
370 }
371
372 } else {
373 gSystem->FreeDirectory(dir);
374 }
375
376 // SHUTTLE responsibles in to
377 TString to="";
378 TIter iterAdmins(fConfig->GetAdmins(AliShuttleConfig::kGlobal));
379 TObjString *anAdmin=0;
380 while ((anAdmin = (TObjString*) iterAdmins.Next()))
381 {
382 to += Form("%s,", anAdmin->GetName());
383 }
384 if (to.Length() > 0)
385 to.Remove(to.Length()-1);
386 AliDebug(2, Form("to: %s",to.Data()));
387
51657f6d 388 // mail body
fb2975a2 389 TString bodyFileName;
390 bodyFileName.Form("%s/mail.body", fShuttle->GetShuttleLogDir());
391 gSystem->ExpandPathName(bodyFileName);
392
393 ofstream mailBody;
394 mailBody.open(bodyFileName, ofstream::out);
395
396 if (!mailBody.is_open())
397 {
398 AliWarning(Form("Could not open mail body file %s", bodyFileName.Data()));
399 return kFALSE;
400 }
401
402 TString subject;
403 TString body;
404
eee6253d 405 Int_t percentage_used = 100 - percentage;
fb2975a2 406 subject = Form("%s CRITICAL Disk Space usage exceeds %d%c!",
eee6253d 407 tmpStr.Data(),percentage_used,'%');
fb2975a2 408 AliDebug(2, Form("subject: %s", subject.Data()));
fb2975a2 409
410 body = "Dear SHUTTLE experts, \n\n";
411 body += "The usage of the disk space on the shuttle machine has overcome \n";
412 body += Form("the threshold of %d%%. \n \n",percentage_used);
413 body += "Please check! \n \n";
414 body += "Please do not answer this message directly, it is automatically generated.\n\n";
415 body += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
416
417 AliDebug(2, Form("Body : %s", body.Data()));
418
419 mailBody << body.Data();
420 mailBody.close();
421
422 // send mail!
423 TString mailCommand = Form("mail -s \"%s\" %s < %s",
424 subject.Data(),
425 to.Data(),
426 bodyFileName.Data());
427 AliDebug(2, Form("mail command: %s", mailCommand.Data()));
428
429 Bool_t result = gSystem->Exec(mailCommand.Data());
430
431 return result == 0;
d477ad88 432}