]>
Commit | Line | Data |
---|---|---|
d477ad88 | 1 | /************************************************************************** |
2 | * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. * | |
3 | * * | |
4 | * Author: The ALICE Off-line Project. * | |
5 | * Contributors are mentioned in the code where appropriate. * | |
6 | * * | |
7 | * Permission to use, copy, modify and distribute this software and its * | |
8 | * documentation strictly for non-commercial purposes is hereby granted * | |
9 | * without fee, provided that the above copyright notice appears in all * | |
10 | * copies and that both the copyright notice and this permission notice * | |
11 | * appear in the supporting documentation. The authors make no claims * | |
12 | * about the suitability of this software for any purpose. It is * | |
13 | * provided "as is" without express or implied warranty. * | |
14 | **************************************************************************/ | |
15 | ||
16 | /* | |
17 | $Log$ | |
1abfbb60 | 18 | Revision 1.15 2007/12/10 18:29:23 acolla |
19 | Some log added to the listen mode | |
20 | ||
6a926ad4 | 21 | Revision 1.14 2007/12/07 19:14:36 acolla |
22 | in AliShuttleTrigger: | |
23 | ||
24 | Added automatic collection of new runs on a regular time basis (settable from the configuration) | |
25 | ||
26 | in AliShuttleConfig: new members | |
27 | ||
28 | - triggerWait: time to wait for DIM trigger (s) before starting automatic collection of new runs | |
29 | - mode: run mode (test, prod) -> used to build log folder (logs or logs_PROD) | |
30 | ||
31 | in AliShuttle: | |
32 | ||
33 | - logs now stored in logs/#RUN/DET_#RUN.log | |
34 | ||
7d4cf768 | 35 | Revision 1.13 2006/11/16 16:16:48 jgrosseo |
36 | introducing strict run ordering flag | |
37 | removed giving preprocessor name to preprocessor, they have to know their name themselves ;-) | |
38 | ||
be48e3ea | 39 | Revision 1.12 2006/10/20 15:22:59 jgrosseo |
40 | o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child | |
41 | o) Merging Collect, CollectAll, CollectNew function | |
42 | o) Removing implementation of empty copy constructors (declaration still there!) | |
43 | ||
cb343cfd | 44 | Revision 1.11 2006/10/02 16:38:39 jgrosseo |
45 | update (alberto): | |
46 | fixed memory leaks | |
47 | storing of objects that failed to be stored to the grid before | |
48 | interfacing of shuttle status table in daq system | |
49 | ||
2bb7b766 | 50 | Revision 1.10 2006/08/15 10:50:00 jgrosseo |
51 | effc++ corrections (alberto) | |
52 | ||
4f0ab988 | 53 | Revision 1.9 2006/08/08 14:19:29 jgrosseo |
54 | Update to shuttle classes (Alberto) | |
55 | ||
56 | - Possibility to set the full object's path in the Preprocessor's and | |
57 | Shuttle's Store functions | |
58 | - Possibility to extend the object's run validity in the same classes | |
59 | ("startValidity" and "validityInfinite" parameters) | |
60 | - Implementation of the StoreReferenceData function to store reference | |
61 | data in a dedicated CDB storage. | |
62 | ||
84090f85 | 63 | Revision 1.8 2006/07/21 07:37:20 jgrosseo |
64 | last run is stored after each run | |
65 | ||
7bfb2090 | 66 | Revision 1.7 2006/07/20 09:54:40 jgrosseo |
67 | introducing status management: The processing per subdetector is divided into several steps, | |
68 | after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle | |
69 | can keep track of the number of failures and skips further processing after a certain threshold is | |
70 | exceeded. These thresholds can be configured in LDAP. | |
71 | ||
5164a766 | 72 | Revision 1.6 2006/07/19 10:09:55 jgrosseo |
73 | new configuration, accesst to DAQ FES (Alberto) | |
74 | ||
57f50b3c | 75 | Revision 1.5 2006/07/10 13:01:41 jgrosseo |
76 | enhanced storing of last sucessfully processed run (alberto) | |
77 | ||
a7160fe9 | 78 | Revision 1.4 2006/07/04 14:59:57 jgrosseo |
79 | revision of AliDCSValue: Removed wrapper classes, reduced storage size per value by factor 2 | |
80 | ||
45a493ce | 81 | Revision 1.3 2006/06/12 09:11:16 jgrosseo |
82 | coding conventions (Alberto) | |
83 | ||
58bc3020 | 84 | Revision 1.2 2006/06/06 14:26:40 jgrosseo |
85 | o) removed files that were moved to STEER | |
86 | o) shuttle updated to follow the new interface (Alberto) | |
87 | ||
b948db8d | 88 | Revision 1.1 2006/03/07 07:52:34 hristov |
89 | New version (B.Yordanov) | |
90 | ||
d477ad88 | 91 | Revision 1.5 2005/11/21 09:03:48 byordano |
92 | one more print added | |
93 | ||
94 | Revision 1.4 2005/11/20 10:12:37 byordano | |
95 | comments added to AliShuttleTrigger | |
96 | ||
97 | */ | |
98 | ||
99 | ||
100 | // | |
101 | // This class is to deal with DAQ LogBook and DAQ "end of run" notification. | |
102 | // It has severeal two modes: | |
cb343cfd | 103 | // 1) synchronized - Collect() |
b948db8d | 104 | // 2) asynchronized - Run() - starts listening for DAQ "end of run" |
d477ad88 | 105 | // notification by DIM service. |
106 | // | |
107 | ||
108 | #include "AliShuttleTrigger.h" | |
109 | ||
d477ad88 | 110 | #include <TSystem.h> |
fb2975a2 | 111 | #include <TObjString.h> |
cb343cfd | 112 | |
d477ad88 | 113 | #include "AliLog.h" |
d477ad88 | 114 | #include "AliShuttleConfig.h" |
115 | #include "AliShuttle.h" | |
116 | #include "DATENotifier.h" | |
117 | ||
fb2975a2 | 118 | #include <fstream> |
119 | ||
d477ad88 | 120 | ClassImp(TerminateSignalHandler) |
cb343cfd | 121 | ClassImp(AliShuttleTrigger) |
58bc3020 | 122 | |
b948db8d | 123 | //______________________________________________________________________________________________ |
cb343cfd | 124 | Bool_t TerminateSignalHandler::Notify() |
58bc3020 | 125 | { |
126 | // Sentd terminate command to the Shuttle trigger | |
d477ad88 | 127 | |
128 | AliInfo("Terminate signal received ..."); | |
129 | fTrigger->Terminate(); | |
130 | ||
131 | return kTRUE; | |
132 | } | |
133 | ||
b948db8d | 134 | //______________________________________________________________________________________________ |
ff3781ad | 135 | AliShuttleTrigger::AliShuttleTrigger(const AliShuttleConfig* config): |
b948db8d | 136 | fConfig(config), fShuttle(NULL), |
2bb7b766 | 137 | fNotified(kFALSE), fTerminate(kFALSE), |
4f0ab988 | 138 | fMutex(), fCondition(&fMutex), |
cb343cfd | 139 | fQuitSignalHandler(0), |
fb2975a2 | 140 | fInterruptSignalHandler(0), |
141 | fLastMailDiskSpace(0) | |
d477ad88 | 142 | { |
143 | // | |
144 | // config - pointer to the AliShuttleConfig object which represents | |
145 | // the configuration | |
b948db8d | 146 | // mainStorage - pointer to AliCDBStorage for the undelying CDBStorage |
147 | // localStorage (local) CDB storage to be used if mainStorage is unavailable | |
d477ad88 | 148 | // |
149 | ||
7d4cf768 | 150 | if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********"); |
ff3781ad | 151 | UInt_t timeout = fConfig->GetDCSTimeOut(); |
152 | Int_t retries = fConfig->GetDCSRetries(); | |
b948db8d | 153 | fShuttle = new AliShuttle(config, timeout, retries); |
d477ad88 | 154 | |
28a94b8e | 155 | fQuitSignalHandler = new TerminateSignalHandler(this, kSigQuit); |
156 | fInterruptSignalHandler = new TerminateSignalHandler(this, kSigInterrupt); | |
58bc3020 | 157 | |
cb343cfd | 158 | gSystem->AddSignalHandler(fQuitSignalHandler); |
159 | gSystem->AddSignalHandler(fInterruptSignalHandler); | |
58bc3020 | 160 | |
161 | } | |
162 | ||
b948db8d | 163 | //______________________________________________________________________________________________ |
58bc3020 | 164 | AliShuttleTrigger::~AliShuttleTrigger() |
165 | { | |
cb343cfd | 166 | // destructor |
d477ad88 | 167 | |
cb343cfd | 168 | gSystem->RemoveSignalHandler(fQuitSignalHandler); |
169 | gSystem->RemoveSignalHandler(fInterruptSignalHandler); | |
d477ad88 | 170 | |
171 | delete fShuttle; | |
cb343cfd | 172 | |
173 | delete fQuitSignalHandler; | |
174 | fQuitSignalHandler = 0; | |
175 | ||
176 | delete fInterruptSignalHandler; | |
177 | fInterruptSignalHandler = 0; | |
d477ad88 | 178 | } |
179 | ||
b948db8d | 180 | //______________________________________________________________________________________________ |
d477ad88 | 181 | Bool_t AliShuttleTrigger::Notify() { |
182 | // | |
cb343cfd | 183 | // Trigger Collect() methods in asynchronized (listen) mode. |
d477ad88 | 184 | // Usually called automaticly by DATENotifier on "end of run" |
185 | // notification event. | |
186 | // | |
187 | ||
188 | fMutex.Lock(); | |
189 | ||
190 | fNotified = kTRUE; | |
191 | fCondition.Signal(); | |
192 | ||
193 | fMutex.UnLock(); | |
194 | ||
195 | return kTRUE; | |
196 | } | |
197 | ||
b948db8d | 198 | //______________________________________________________________________________________________ |
d477ad88 | 199 | void AliShuttleTrigger::Terminate() { |
200 | // | |
201 | // Stop triggers listen mode and exist from Run() | |
202 | // Usually called automaticly by TerminateSignalHandler. | |
203 | // | |
204 | ||
205 | fTerminate = kTRUE; | |
206 | fCondition.Signal(); | |
207 | } | |
208 | ||
b948db8d | 209 | //______________________________________________________________________________________________ |
d477ad88 | 210 | void AliShuttleTrigger::Run() { |
211 | // | |
212 | // AliShuttleTrigger main loop for asynchronized (listen) mode. | |
213 | // It spawns DIM service listener and waits for DAQ "end of run" | |
cb343cfd | 214 | // notification. Calls Collect() on notification. |
d477ad88 | 215 | // |
216 | ||
217 | fTerminate = kFALSE; | |
218 | ||
4a5d9e0d | 219 | DATENotifier* notifier = new DATENotifier(this, "/LOGBOOK/SUBSCRIBE/ECS_EOR"); |
d477ad88 | 220 | |
6a926ad4 | 221 | Int_t nTry=0; |
222 | Int_t nMaxTry = fConfig->GetMaxRetries()+1; | |
223 | Int_t received=0; | |
224 | ||
225 | AliInfo("Listening for ECS trigger"); | |
7d4cf768 | 226 | |
d477ad88 | 227 | while (1) { |
228 | ||
229 | fMutex.Lock(); | |
230 | ||
231 | while (!(fNotified || fTerminate)) { | |
6a926ad4 | 232 | received=fCondition.TimedWaitRelative(1000*fConfig->GetTriggerWait()); |
233 | if (received==1) break; // 1 = timeout | |
d477ad88 | 234 | } |
235 | ||
236 | fNotified = kFALSE; | |
237 | ||
238 | fMutex.UnLock(); | |
239 | ||
240 | if (fTerminate) { | |
241 | AliInfo("Terminated."); | |
242 | break; | |
243 | } | |
7d4cf768 | 244 | |
6a926ad4 | 245 | if (received == 0) |
246 | { | |
247 | AliInfo("Trigger from ECS received!"); | |
248 | } else if (received == 1) { | |
249 | AliInfo(Form("Timeout (%d s) waiting for trigger. " | |
250 | "Starting collection of new runs!", | |
251 | fConfig->GetTriggerWait())); | |
252 | } else { | |
253 | AliInfo("Error receiving trigger from ECS!"); | |
254 | break; | |
255 | } | |
256 | ||
1abfbb60 | 257 | nTry++; |
258 | AliInfo(Form("Received %d triggers so far", nTry)); | |
259 | ||
6a926ad4 | 260 | if (fConfig->GetRunMode() == AliShuttleConfig::kTest) |
261 | { | |
6a926ad4 | 262 | if(nTry>=nMaxTry) |
263 | { | |
264 | AliInfo(Form("Collect() ran more than %d times -> Exiting!", | |
265 | nMaxTry)); | |
266 | break; | |
267 | } | |
268 | } | |
d477ad88 | 269 | |
cb343cfd | 270 | Collect(); |
d477ad88 | 271 | } |
272 | ||
273 | delete notifier; | |
274 | } | |
275 | ||
b948db8d | 276 | //______________________________________________________________________________________________ |
a7160fe9 | 277 | Bool_t AliShuttleTrigger::Collect(Int_t run) |
58bc3020 | 278 | { |
d477ad88 | 279 | // |
cb343cfd | 280 | // this function creates a thread that runs the shuttle |
281 | // then it checks if the shuttle is still running by checking the monitoring functions of the shuttle | |
d477ad88 | 282 | // |
283 | ||
fb2975a2 | 284 | // first checking disk space |
285 | Long_t id = 0; | |
286 | Long_t bsize = 0; | |
287 | Long_t blocks = 0; | |
288 | Long_t bfree = 0; | |
289 | ||
290 | gSystem->GetFsInfo(fConfig->GetShuttleFileSystem(), &id, &bsize, &blocks, &bfree); | |
291 | ||
292 | AliInfo(Form("n. of free blocks = %d, total n. of blocks = %d",bfree,blocks)); | |
293 | Int_t spaceFree = (Int_t)(((Float_t)bfree/(Float_t)blocks)*100); | |
294 | ||
295 | if (spaceFree < fConfig->GetFreeDiskWarningThreshold()) { | |
296 | AliWarning(Form("************** Free space left = %d%%, below the Warning Threshold (%d%%)",spaceFree,fConfig->GetFreeDiskWarningThreshold())); | |
297 | if (TMath::Abs(time(0) - fLastMailDiskSpace) >= 86400){ // 86400 = n. of seconds in 1 d | |
298 | SendMailDiskSpace(fConfig->GetFreeDiskWarningThreshold()); | |
299 | fLastMailDiskSpace = time(0); // resetting fLastMailDiskSpace to time(0) = now | |
300 | } | |
301 | if (spaceFree < fConfig->GetFreeDiskFatalThreshold()){ | |
302 | AliError(Form("*************** Free space left = %d%%, below the Fatal Threshold (%d%%), terminating....",spaceFree,fConfig->GetFreeDiskFatalThreshold())); | |
303 | SendMailDiskSpace(fConfig->GetFreeDiskFatalThreshold()); | |
304 | fTerminate = kTRUE; // terminating.... | |
305 | } | |
306 | } | |
307 | ||
308 | if (fTerminate) { | |
309 | return kFALSE; | |
310 | } | |
311 | ||
312 | return fShuttle->Collect(run); | |
313 | } | |
314 | //______________________________________________________________________________________________ | |
315 | Bool_t AliShuttleTrigger::SendMailDiskSpace(Short_t percentage) | |
316 | { | |
317 | // | |
318 | // sends a mail to the shuttle experts in case of free disk space < theshold | |
319 | // | |
320 | ||
321 | ||
322 | AliInfo("******************* Sending the Mail!! *********************"); | |
323 | if (!fConfig->SendMail()) | |
324 | return kTRUE; | |
325 | ||
326 | Int_t runMode = (Int_t)fConfig->GetRunMode(); | |
327 | TString tmpStr; | |
328 | if (runMode == 0) tmpStr = " Nightly Test:"; | |
329 | else tmpStr = " Data Taking:"; | |
330 | void* dir = gSystem->OpenDirectory(fShuttle->GetShuttleLogDir()); | |
331 | if (dir == NULL) | |
332 | { | |
333 | if (gSystem->mkdir(fShuttle->GetShuttleLogDir(), kTRUE)) | |
334 | { | |
335 | AliWarning(Form("SendMail - Can't open directory <%s>", fShuttle->GetShuttleLogDir())); | |
336 | return kFALSE; | |
337 | } | |
338 | ||
339 | } else { | |
340 | gSystem->FreeDirectory(dir); | |
341 | } | |
342 | ||
343 | // SHUTTLE responsibles in to | |
344 | TString to=""; | |
345 | TIter iterAdmins(fConfig->GetAdmins(AliShuttleConfig::kGlobal)); | |
346 | TObjString *anAdmin=0; | |
347 | while ((anAdmin = (TObjString*) iterAdmins.Next())) | |
348 | { | |
349 | to += Form("%s,", anAdmin->GetName()); | |
350 | } | |
351 | if (to.Length() > 0) | |
352 | to.Remove(to.Length()-1); | |
353 | AliDebug(2, Form("to: %s",to.Data())); | |
354 | ||
355 | // mail body | |
356 | TString bodyFileName; | |
357 | bodyFileName.Form("%s/mail.body", fShuttle->GetShuttleLogDir()); | |
358 | gSystem->ExpandPathName(bodyFileName); | |
359 | ||
360 | ofstream mailBody; | |
361 | mailBody.open(bodyFileName, ofstream::out); | |
362 | ||
363 | if (!mailBody.is_open()) | |
364 | { | |
365 | AliWarning(Form("Could not open mail body file %s", bodyFileName.Data())); | |
366 | return kFALSE; | |
367 | } | |
368 | ||
369 | TString subject; | |
370 | TString body; | |
371 | ||
eee6253d | 372 | Int_t percentage_used = 100 - percentage; |
fb2975a2 | 373 | subject = Form("%s CRITICAL Disk Space usage exceeds %d%c!", |
eee6253d | 374 | tmpStr.Data(),percentage_used,'%'); |
fb2975a2 | 375 | AliDebug(2, Form("subject: %s", subject.Data())); |
fb2975a2 | 376 | |
377 | body = "Dear SHUTTLE experts, \n\n"; | |
378 | body += "The usage of the disk space on the shuttle machine has overcome \n"; | |
379 | body += Form("the threshold of %d%%. \n \n",percentage_used); | |
380 | body += "Please check! \n \n"; | |
381 | body += "Please do not answer this message directly, it is automatically generated.\n\n"; | |
382 | body += "Greetings,\n\n \t\t\tthe SHUTTLE\n"; | |
383 | ||
384 | AliDebug(2, Form("Body : %s", body.Data())); | |
385 | ||
386 | mailBody << body.Data(); | |
387 | mailBody.close(); | |
388 | ||
389 | // send mail! | |
390 | TString mailCommand = Form("mail -s \"%s\" %s < %s", | |
391 | subject.Data(), | |
392 | to.Data(), | |
393 | bodyFileName.Data()); | |
394 | AliDebug(2, Form("mail command: %s", mailCommand.Data())); | |
395 | ||
396 | Bool_t result = gSystem->Exec(mailCommand.Data()); | |
397 | ||
398 | return result == 0; | |
d477ad88 | 399 | } |