Changes in TRD configuration for data taking.
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttleTrigger.cxx
CommitLineData
d477ad88 1/**************************************************************************
2 * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3 * *
4 * Author: The ALICE Off-line Project. *
5 * Contributors are mentioned in the code where appropriate. *
6 * *
7 * Permission to use, copy, modify and distribute this software and its *
8 * documentation strictly for non-commercial purposes is hereby granted *
9 * without fee, provided that the above copyright notice appears in all *
10 * copies and that both the copyright notice and this permission notice *
11 * appear in the supporting documentation. The authors make no claims *
12 * about the suitability of this software for any purpose. It is *
13 * provided "as is" without express or implied warranty. *
14 **************************************************************************/
15
16/*
17 $Log$
1abfbb60 18 Revision 1.15 2007/12/10 18:29:23 acolla
19 Some log added to the listen mode
20
6a926ad4 21 Revision 1.14 2007/12/07 19:14:36 acolla
22 in AliShuttleTrigger:
23
24 Added automatic collection of new runs on a regular time basis (settable from the configuration)
25
26 in AliShuttleConfig: new members
27
28 - triggerWait: time to wait for DIM trigger (s) before starting automatic collection of new runs
29 - mode: run mode (test, prod) -> used to build log folder (logs or logs_PROD)
30
31 in AliShuttle:
32
33 - logs now stored in logs/#RUN/DET_#RUN.log
34
7d4cf768 35 Revision 1.13 2006/11/16 16:16:48 jgrosseo
36 introducing strict run ordering flag
37 removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
38
be48e3ea 39 Revision 1.12 2006/10/20 15:22:59 jgrosseo
40 o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
41 o) Merging Collect, CollectAll, CollectNew function
42 o) Removing implementation of empty copy constructors (declaration still there!)
43
cb343cfd 44 Revision 1.11 2006/10/02 16:38:39 jgrosseo
45 update (alberto):
46 fixed memory leaks
47 storing of objects that failed to be stored to the grid before
48 interfacing of shuttle status table in daq system
49
2bb7b766 50 Revision 1.10 2006/08/15 10:50:00 jgrosseo
51 effc++ corrections (alberto)
52
4f0ab988 53 Revision 1.9 2006/08/08 14:19:29 jgrosseo
54 Update to shuttle classes (Alberto)
55
56 - Possibility to set the full object's path in the Preprocessor's and
57 Shuttle's Store functions
58 - Possibility to extend the object's run validity in the same classes
59 ("startValidity" and "validityInfinite" parameters)
60 - Implementation of the StoreReferenceData function to store reference
61 data in a dedicated CDB storage.
62
84090f85 63 Revision 1.8 2006/07/21 07:37:20 jgrosseo
64 last run is stored after each run
65
7bfb2090 66 Revision 1.7 2006/07/20 09:54:40 jgrosseo
67 introducing status management: The processing per subdetector is divided into several steps,
68 after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle
69 can keep track of the number of failures and skips further processing after a certain threshold is
70 exceeded. These thresholds can be configured in LDAP.
71
5164a766 72 Revision 1.6 2006/07/19 10:09:55 jgrosseo
73 new configuration, accesst to DAQ FES (Alberto)
74
57f50b3c 75 Revision 1.5 2006/07/10 13:01:41 jgrosseo
76 enhanced storing of last sucessfully processed run (alberto)
77
a7160fe9 78 Revision 1.4 2006/07/04 14:59:57 jgrosseo
79 revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2
80
45a493ce 81 Revision 1.3 2006/06/12 09:11:16 jgrosseo
82 coding conventions (Alberto)
83
58bc3020 84 Revision 1.2 2006/06/06 14:26:40 jgrosseo
85 o) removed files that were moved to STEER
86 o) shuttle updated to follow the new interface (Alberto)
87
b948db8d 88 Revision 1.1 2006/03/07 07:52:34 hristov
89 New version (B.Yordanov)
90
d477ad88 91 Revision 1.5 2005/11/21 09:03:48 byordano
92 one more print added
93
94 Revision 1.4 2005/11/20 10:12:37 byordano
95 comments added to AliShuttleTrigger
96
97 */
98
99
100//
101// This class is to deal with DAQ LogBook and DAQ "end of run" notification.
102// It has severeal two modes:
cb343cfd 103// 1) synchronized - Collect()
b948db8d 104// 2) asynchronized - Run() - starts listening for DAQ "end of run"
d477ad88 105// notification by DIM service.
106//
107
108#include "AliShuttleTrigger.h"
109
d477ad88 110#include <TSystem.h>
fb2975a2 111#include <TObjString.h>
cb343cfd 112
d477ad88 113#include "AliLog.h"
d477ad88 114#include "AliShuttleConfig.h"
115#include "AliShuttle.h"
116#include "DATENotifier.h"
117
fb2975a2 118#include <fstream>
119
d477ad88 120ClassImp(TerminateSignalHandler)
cb343cfd 121ClassImp(AliShuttleTrigger)
58bc3020 122
b948db8d 123//______________________________________________________________________________________________
cb343cfd 124Bool_t TerminateSignalHandler::Notify()
58bc3020 125{
126// Sentd terminate command to the Shuttle trigger
d477ad88 127
128 AliInfo("Terminate signal received ...");
129 fTrigger->Terminate();
130
131 return kTRUE;
132}
133
58bc3020 134//______________________________________________________________________________________________
ff3781ad 135AliShuttleTrigger::AliShuttleTrigger(const AliShuttleConfig* config):
b948db8d 136 fConfig(config), fShuttle(NULL),
2bb7b766 137 fNotified(kFALSE), fTerminate(kFALSE),
4f0ab988 138 fMutex(), fCondition(&fMutex),
cb343cfd 139 fQuitSignalHandler(0),
fb2975a2 140 fInterruptSignalHandler(0),
141 fLastMailDiskSpace(0)
d477ad88 142{
143 //
144 // config - pointer to the AliShuttleConfig object which represents
145 // the configuration
b948db8d 146 // mainStorage - pointer to AliCDBStorage for the undelying CDBStorage
147 // localStorage (local) CDB storage to be used if mainStorage is unavailable
d477ad88 148 //
149
7d4cf768 150 if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
ff3781ad 151 UInt_t timeout = fConfig->GetDCSTimeOut();
152 Int_t retries = fConfig->GetDCSRetries();
b948db8d 153 fShuttle = new AliShuttle(config, timeout, retries);
d477ad88 154
28a94b8e 155 fQuitSignalHandler = new TerminateSignalHandler(this, kSigQuit);
156 fInterruptSignalHandler = new TerminateSignalHandler(this, kSigInterrupt);
58bc3020 157
cb343cfd 158 gSystem->AddSignalHandler(fQuitSignalHandler);
159 gSystem->AddSignalHandler(fInterruptSignalHandler);
58bc3020 160
161}
162
b948db8d 163//______________________________________________________________________________________________
58bc3020 164AliShuttleTrigger::~AliShuttleTrigger()
165{
cb343cfd 166 // destructor
d477ad88 167
cb343cfd 168 gSystem->RemoveSignalHandler(fQuitSignalHandler);
169 gSystem->RemoveSignalHandler(fInterruptSignalHandler);
d477ad88 170
171 delete fShuttle;
cb343cfd 172
173 delete fQuitSignalHandler;
174 fQuitSignalHandler = 0;
175
176 delete fInterruptSignalHandler;
177 fInterruptSignalHandler = 0;
d477ad88 178}
179
b948db8d 180//______________________________________________________________________________________________
d477ad88 181Bool_t AliShuttleTrigger::Notify() {
182 //
cb343cfd 183 // Trigger Collect() methods in asynchronized (listen) mode.
d477ad88 184 // Usually called automaticly by DATENotifier on "end of run"
185 // notification event.
186 //
187
188 fMutex.Lock();
189
190 fNotified = kTRUE;
191 fCondition.Signal();
192
193 fMutex.UnLock();
194
195 return kTRUE;
196}
197
b948db8d 198//______________________________________________________________________________________________
d477ad88 199void AliShuttleTrigger::Terminate() {
200 //
201 // Stop triggers listen mode and exist from Run()
202 // Usually called automaticly by TerminateSignalHandler.
203 //
204
205 fTerminate = kTRUE;
206 fCondition.Signal();
207}
208
b948db8d 209//______________________________________________________________________________________________
d477ad88 210void AliShuttleTrigger::Run() {
211 //
212 // AliShuttleTrigger main loop for asynchronized (listen) mode.
213 // It spawns DIM service listener and waits for DAQ "end of run"
cb343cfd 214 // notification. Calls Collect() on notification.
d477ad88 215 //
216
217 fTerminate = kFALSE;
218
4a5d9e0d 219 DATENotifier* notifier = new DATENotifier(this, "/LOGBOOK/SUBSCRIBE/ECS_EOR");
d477ad88 220
6a926ad4 221 Int_t nTry=0;
222 Int_t nMaxTry = fConfig->GetMaxRetries()+1;
223 Int_t received=0;
224
225 AliInfo("Listening for ECS trigger");
7d4cf768 226
d477ad88 227 while (1) {
228
229 fMutex.Lock();
230
231 while (!(fNotified || fTerminate)) {
6a926ad4 232 received=fCondition.TimedWaitRelative(1000*fConfig->GetTriggerWait());
233 if (received==1) break; // 1 = timeout
d477ad88 234 }
235
236 fNotified = kFALSE;
237
238 fMutex.UnLock();
239
240 if (fTerminate) {
241 AliInfo("Terminated.");
242 break;
243 }
7d4cf768 244
6a926ad4 245 if (received == 0)
246 {
247 AliInfo("Trigger from ECS received!");
248 } else if (received == 1) {
249 AliInfo(Form("Timeout (%d s) waiting for trigger. "
250 "Starting collection of new runs!",
251 fConfig->GetTriggerWait()));
252 } else {
253 AliInfo("Error receiving trigger from ECS!");
254 break;
255 }
256
1abfbb60 257 nTry++;
258 AliInfo(Form("Received %d triggers so far", nTry));
259
6a926ad4 260 if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
261 {
6a926ad4 262 if(nTry>=nMaxTry)
263 {
264 AliInfo(Form("Collect() ran more than %d times -> Exiting!",
265 nMaxTry));
266 break;
267 }
268 }
d477ad88 269
cb343cfd 270 Collect();
d477ad88 271 }
272
273 delete notifier;
274}
275
b948db8d 276//______________________________________________________________________________________________
a7160fe9 277Bool_t AliShuttleTrigger::Collect(Int_t run)
58bc3020 278{
d477ad88 279 //
cb343cfd 280 // this function creates a thread that runs the shuttle
281 // then it checks if the shuttle is still running by checking the monitoring functions of the shuttle
d477ad88 282 //
283
fb2975a2 284 // first checking disk space
285 Long_t id = 0;
286 Long_t bsize = 0;
287 Long_t blocks = 0;
288 Long_t bfree = 0;
289
290 gSystem->GetFsInfo(fConfig->GetShuttleFileSystem(), &id, &bsize, &blocks, &bfree);
291
292 AliInfo(Form("n. of free blocks = %d, total n. of blocks = %d",bfree,blocks));
293 Int_t spaceFree = (Int_t)(((Float_t)bfree/(Float_t)blocks)*100);
294
295 if (spaceFree < fConfig->GetFreeDiskWarningThreshold()) {
296 AliWarning(Form("************** Free space left = %d%%, below the Warning Threshold (%d%%)",spaceFree,fConfig->GetFreeDiskWarningThreshold()));
297 if (TMath::Abs(time(0) - fLastMailDiskSpace) >= 86400){ // 86400 = n. of seconds in 1 d
298 SendMailDiskSpace(fConfig->GetFreeDiskWarningThreshold());
299 fLastMailDiskSpace = time(0); // resetting fLastMailDiskSpace to time(0) = now
300 }
301 if (spaceFree < fConfig->GetFreeDiskFatalThreshold()){
302 AliError(Form("*************** Free space left = %d%%, below the Fatal Threshold (%d%%), terminating....",spaceFree,fConfig->GetFreeDiskFatalThreshold()));
303 SendMailDiskSpace(fConfig->GetFreeDiskFatalThreshold());
304 fTerminate = kTRUE; // terminating....
305 }
306 }
307
308 if (fTerminate) {
309 return kFALSE;
310 }
311
312 return fShuttle->Collect(run);
313}
314//______________________________________________________________________________________________
315Bool_t AliShuttleTrigger::SendMailDiskSpace(Short_t percentage)
316{
317 //
318 // sends a mail to the shuttle experts in case of free disk space < theshold
319 //
320
321
322 AliInfo("******************* Sending the Mail!! *********************");
323 if (!fConfig->SendMail())
324 return kTRUE;
325
326 Int_t runMode = (Int_t)fConfig->GetRunMode();
327 TString tmpStr;
328 if (runMode == 0) tmpStr = " Nightly Test:";
329 else tmpStr = " Data Taking:";
330 void* dir = gSystem->OpenDirectory(fShuttle->GetShuttleLogDir());
331 if (dir == NULL)
332 {
333 if (gSystem->mkdir(fShuttle->GetShuttleLogDir(), kTRUE))
334 {
335 AliWarning(Form("SendMail - Can't open directory <%s>", fShuttle->GetShuttleLogDir()));
336 return kFALSE;
337 }
338
339 } else {
340 gSystem->FreeDirectory(dir);
341 }
342
343 // SHUTTLE responsibles in to
344 TString to="";
345 TIter iterAdmins(fConfig->GetAdmins(AliShuttleConfig::kGlobal));
346 TObjString *anAdmin=0;
347 while ((anAdmin = (TObjString*) iterAdmins.Next()))
348 {
349 to += Form("%s,", anAdmin->GetName());
350 }
351 if (to.Length() > 0)
352 to.Remove(to.Length()-1);
353 AliDebug(2, Form("to: %s",to.Data()));
354
355 // mail body
356 TString bodyFileName;
357 bodyFileName.Form("%s/mail.body", fShuttle->GetShuttleLogDir());
358 gSystem->ExpandPathName(bodyFileName);
359
360 ofstream mailBody;
361 mailBody.open(bodyFileName, ofstream::out);
362
363 if (!mailBody.is_open())
364 {
365 AliWarning(Form("Could not open mail body file %s", bodyFileName.Data()));
366 return kFALSE;
367 }
368
369 TString subject;
370 TString body;
371
372 subject = Form("%s CRITICAL Disk Space usage exceeds %d%c!",
373 tmpStr.Data(),percentage,'%');
374 AliDebug(2, Form("subject: %s", subject.Data()));
375 Int_t percentage_used = 100 - percentage;
376
377 body = "Dear SHUTTLE experts, \n\n";
378 body += "The usage of the disk space on the shuttle machine has overcome \n";
379 body += Form("the threshold of %d%%. \n \n",percentage_used);
380 body += "Please check! \n \n";
381 body += "Please do not answer this message directly, it is automatically generated.\n\n";
382 body += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
383
384 AliDebug(2, Form("Body : %s", body.Data()));
385
386 mailBody << body.Data();
387 mailBody.close();
388
389 // send mail!
390 TString mailCommand = Form("mail -s \"%s\" %s < %s",
391 subject.Data(),
392 to.Data(),
393 bodyFileName.Data());
394 AliDebug(2, Form("mail command: %s", mailCommand.Data()));
395
396 Bool_t result = gSystem->Exec(mailCommand.Data());
397
398 return result == 0;
d477ad88 399}