]> git.uio.no Git - u/mrichter/AliRoot.git/blame - MONITOR/AliOnlineReco.cxx
Fix - clean (remove) run's reconstructed directory after reco/viz
[u/mrichter/AliRoot.git] / MONITOR / AliOnlineReco.cxx
CommitLineData
c6d78c69 1// @(#)root/eve:$Id$
2// Author: Matevz Tadel 2007
3
4/**************************************************************************
dc836d53 5 * Copyright(c) 1998-2008, ALICE Experiment at CERN, all rights reserved. *)
c6d78c69 6 * See http://aliceinfo.cern.ch/Offline/AliRoot/License.html for *
7 * full copyright notice. *
8 **************************************************************************/
9
10#include "AliOnlineReco.h"
93624d6b 11#include "AliChildProcTerminator.h"
c6d78c69 12#include "AliDimIntNotifier.h"
a15a9f84 13#include "AliCDBManager.h"
14#include "AliGRPPreprocessor.h"
c6d78c69 15
dc836d53 16#include <TTimer.h>
17
c6d78c69 18#include <TGListBox.h>
19#include <TGButton.h>
20
d2a137c1 21#include <TInterpreter.h>
e849dadd 22#include <TROOT.h>
d2a137c1 23
c6d78c69 24#include <unistd.h>
25#include <signal.h>
26
27//______________________________________________________________________________
28// Full description of AliOnlineReco
29//
30
31ClassImp(AliOnlineReco)
32
33AliOnlineReco::AliOnlineReco() :
34 TGMainFrame(gClient->GetRoot(), 400, 400),
35
dc836d53 36 fRunList(0), fAutoRun(0), fStartButt(0), fStopButt(0), fExitButt(0),
37 fAutoRunTimer(0), fAutoRunScheduled(0), fAutoRunRunning(0),
4f6eef9f 38 fRun2PidMap(),
e849dadd 39 fTestMode(kFALSE),
40 fDoExit(kFALSE)
c6d78c69 41{
48c0589a 42 // Constructor.
43
c6d78c69 44 // GUI components.
45 fRunList = new TGListBox(this);
46 AddFrame(fRunList, new TGLayoutHints(kLHintsNormal | kLHintsExpandX | kLHintsExpandY));
47
48 TGHorizontalFrame *hf = new TGHorizontalFrame(this, 1, 20);
49
dc836d53 50 fAutoRun = new TGCheckButton(hf, "AutoRun");
51 hf->AddFrame(fAutoRun, new TGLayoutHints(kLHintsNormal | kLHintsExpandX | kLHintsExpandY));
52 fAutoRun->Connect("Clicked()", "AliOnlineReco", this, "DoAutoRun()");
53
c6d78c69 54 fStartButt = new TGTextButton(hf, "Start");
55 hf->AddFrame(fStartButt, new TGLayoutHints(kLHintsNormal | kLHintsExpandX | kLHintsExpandY));
56 fStartButt->Connect("Clicked()", "AliOnlineReco", this, "DoStart()");
57
58 fStopButt = new TGTextButton(hf, "Stop");
59 hf->AddFrame(fStopButt, new TGLayoutHints(kLHintsNormal | kLHintsExpandX | kLHintsExpandY));
60 fStopButt->Connect("Clicked()", "AliOnlineReco", this, "DoStop()");
61
dc836d53 62 fExitButt = new TGTextButton(hf, "Exit");
63 hf->AddFrame(fExitButt, new TGLayoutHints(kLHintsNormal | kLHintsExpandX | kLHintsExpandY));
64 fExitButt->Connect("Clicked()", "AliOnlineReco", this, "DoExit()");
c6d78c69 65
66 AddFrame(hf, new TGLayoutHints(kLHintsNormal | kLHintsExpandX));
67
68 MapSubwindows();
69 Layout();
70 SetWindowName("Alice Online Reconstruction");
71
e35c7687 72 // DIM interface.
73 for (Int_t i = 0; i < 5; ++i)
74 {
75 if (i == 0)
76 {
77 fSOR[i] = new AliDimIntNotifier("/LOGBOOK/SUBSCRIBE/DAQ_SOR_PHYSICS");
78 fEOR[i] = new AliDimIntNotifier("/LOGBOOK/SUBSCRIBE/DAQ_EOR_PHYSICS");
79 }
80 else
81 {
82 fSOR[i] = new AliDimIntNotifier(Form("/LOGBOOK/SUBSCRIBE/DAQ_SOR_PHYSICS_%d", i));
83 fEOR[i] = new AliDimIntNotifier(Form("/LOGBOOK/SUBSCRIBE/DAQ_EOR_PHYSICS_%d", i));
84 }
dc836d53 85
e35c7687 86 fSOR[i]->Connect("DimMessage(Int_t)", "AliOnlineReco", this, "StartOfRun(Int_t)");
87 fEOR[i]->Connect("DimMessage(Int_t)", "AliOnlineReco", this, "EndOfRun(Int_t)");
88 }
c6d78c69 89
dc836d53 90 const Int_t autoRunDelay = 10; // should go to config
91 fAutoRunTimer = new TTimer(autoRunDelay * 1000l);
92 fAutoRunTimer->Connect("Timeout()", "AliOnlineReco", this, "AutoRunTimerTimeout()");
93
b6269fdc 94 // OS Signal handlers
c6d78c69 95 // ROOT's TSignalHAndler works not SIGCHLD ...
93624d6b 96 AliChildProcTerminator::Instance()->Connect("ChildProcTerm(Int_t,Int_t)", "AliOnlineReco", this, "ChildProcTerm(Int_t,Int_t)");
e849dadd 97
98 // we need this by OnExit() to kill next process child after another
99 Connect("ChildProcTerm(Int_t,Int_t)", "AliOnlineReco", this, "ExitLoopChildProcTerm()");
c6d78c69 100}
101
dc836d53 102AliOnlineReco::~AliOnlineReco()
103{
48c0589a 104 // Destructor.
105
dc836d53 106 delete fAutoRunTimer;
107}
108
109Int_t AliOnlineReco::GetLastRun() const
110{
48c0589a 111 // Returns the last started run.
112
dc836d53 113 return fRun2PidMap.empty() ? 0 : fRun2PidMap.rbegin()->first;
114}
115
116Bool_t AliOnlineReco::GetAutoRunMode() const
117{
48c0589a 118 // Return state of auto-run flag.
119
dc836d53 120 return fAutoRun->IsOn();
121}
122
123void AliOnlineReco::SetAutoRunMode(Bool_t ar)
124{
48c0589a 125 // Set auto-run flag.
126
dc836d53 127 if (ar == fAutoRun->IsOn())
128 return;
129
130 fAutoRun->SetState(ar ? kButtonDown : kButtonUp, kTRUE);
131}
132
133//------------------------------------------------------------------------------
134// Private methods
135//------------------------------------------------------------------------------
136
c6d78c69 137AliOnlineReco::mIntInt_i AliOnlineReco::FindMapEntryByPid(Int_t pid)
138{
48c0589a 139 // Find run-to-pid map iterator by pid.
140 // Requires iteration over map.
141
c6d78c69 142 for (mIntInt_i i = fRun2PidMap.begin(); i != fRun2PidMap.end(); ++i)
143 {
144 if (i->second == pid)
145 return i;
146 }
147
148 return fRun2PidMap.end();
149}
150
dc836d53 151void AliOnlineReco::StartAliEve(mIntInt_i& mi)
152{
48c0589a 153 // Start alieve to process run given my the run-pid entry.
154
dc836d53 155 Int_t run = mi->first;
156
157 if (mi->second == 0)
158 {
159 pid_t pid = fork();
160 if (pid == -1)
161 {
162 perror("DoStart -- Fork failed");
163 return;
164 }
165
166 if (pid)
167 {
168 mi->second = pid;
169 fRunList->RemoveEntry(run);
170 fRunList->AddEntrySort(TString::Format("%-20d -- RUNNING", run), run);
171 fRunList->Layout();
172 }
173 else
174 {
d2a137c1 175 gCINTMutex = 0;
176
177 struct sigaction sac;
b6269fdc 178 memset(&sac, 0, sizeof(sac));
179 sac.sa_handler = NULL;
d2a137c1 180 sigemptyset(&sac.sa_mask);
181 sac.sa_flags = 0;
07f75a04 182
183 // The sa_restorer field is Not POSIX and obsolete.
184 // This is for compilation on other systems
185 #if defined(__linux) && \
186 (defined(__i386__) || defined(__x86_64__)) && \
187 defined(__GNUC__)
188 sac.sa_restorer= NULL;
189 #endif
b6269fdc 190 sigaction(SIGCHLD, &sac, NULL);
191
dc836d53 192 int s;
193 if (fTestMode)
194 {
b6269fdc 195 s = execlp("alitestproc", "alitestproc", TString::Format("%d", run).Data(), (char*) 0);
dc836d53 196 }
197 else
198 {
b6269fdc 199 Int_t procPID = gSystem->GetPid();
200 TString logFile = Form("%s/reco/log/run%d_%d.log",
201 gSystem->Getenv("ONLINERECO_BASE_DIR"),
202 run,
203 (Int_t)procPID);
204 Info("DoStart","Reconstruction log will be written to %s",logFile.Data());
205 gSystem->RedirectOutput(logFile.Data());
206
207 gSystem->cd(Form("%s/reco",gSystem->Getenv("ONLINERECO_BASE_DIR")));
208
209 TString gdcs;
210 if (RetrieveGRP(run,gdcs) <= 0 || gdcs.IsNull())
211 gSystem->Exit(1);
212
213 gSystem->Setenv("DATE_RUN_NUMBER", Form("%d", run));
214 // Setting CDB
215 // AliCDBManager * man = AliCDBManager::Instance();
216 // man->SetDefaultStorage("local:///local/cdb");
217 // man->SetSpecificStorage("GRP/GRP/Data",
218 // Form("local://%s",gSystem->pwd()));
219 // man->SetSpecificStorage("GRP/CTP/Config",
220 // Form("local://%s",gSystem->pwd()));
221 // man->SetSpecificStorage("ACORDE/Align/Data",
222 // "local://$ALICE_ROOT/OCDB");
223
224 gSystem->mkdir(Form("run%d_%d", run, (Int_t)procPID));
225 gSystem->cd(Form("run%d_%d", run, (Int_t)procPID));
226
227 TString recMacroPath(gSystem->Getenv("ONLINERECO_MACRO"));
228 if (recMacroPath.IsNull()) {
229 recMacroPath = "$ALICE_ROOT/MONITOR/rec.C";
230 }
e849dadd 231
b6269fdc 232 s = execlp("alieve",
233 "alieve",
234 "-q",
235 Form("%s(\"mem://@*:\")", gSystem->ExpandPathName(recMacroPath.Data())),
236 (char*) 0);
237
dc836d53 238 }
239
240 if (s == -1)
241 {
b6269fdc 242 perror("execlp failed - this will not end well");
243 gSystem->Exit(1);
dc836d53 244 }
245 }
246 }
247 else
248 {
249 Error("DoStart", "Process already running.");
250 }
251}
252
253void AliOnlineReco::KillPid(Int_t pid)
254{
48c0589a 255 // Terminate process given by pid.
256
dc836d53 257 // Send terminate signal to process ...
258
259 if (fTestMode)
260 {
261 kill(pid, SIGTERM);
262 }
263 else
264 {
265 // alieve will auto-destruct on SIGUSR1
266 kill(pid, SIGUSR1);
267 }
268}
269
270void AliOnlineReco::StartAutoRunTimer(Int_t run)
271{
272 // Start timer for given run.
273 // If an auto-started run is already active, this call is ignored.
274 // If timer is already active, it is restarted.
275
276 if (fAutoRunRunning)
277 return;
278
279 fAutoRunTimer->Reset();
280 fAutoRunTimer->TurnOn();
281 fAutoRunScheduled = run;
282
283 Info("StartAutoRunTimer", "Scheduling run %d for auto-display.", run);
284}
285
286void AliOnlineReco::StopAutoRunTimer()
287{
48c0589a 288 // Stop auto-run timer.
289
dc836d53 290 fAutoRunTimer->TurnOff();
291 fAutoRunScheduled = 0;
292}
293
294void AliOnlineReco::AutoRunTimerTimeout()
295{
48c0589a 296 // Slot called on auto-timer timeout.
297
dc836d53 298 Int_t run = fAutoRunScheduled;
299
300 StopAutoRunTimer();
301
302 mIntInt_i i = fRun2PidMap.find(run);
303
304 if (i == fRun2PidMap.end())
305 {
306 Warning("AutoRunTimerTimeout", "run no longer active.");
307 return;
308 }
309
310 Info("AutoRunTimerTimeout", "Starting display for run %d.", run);
311
312 StartAliEve(i);
313 fAutoRunRunning = run;
314}
315
c6d78c69 316//------------------------------------------------------------------------------
317// Handlers of DIM signals.
318//------------------------------------------------------------------------------
319
320void AliOnlineReco::StartOfRun(Int_t run)
321{
48c0589a 322 // Slot called from DIM handler on start of run.
323
c6d78c69 324 mIntInt_i i = fRun2PidMap.find(run);
325 if (i == fRun2PidMap.end())
326 {
327 fRun2PidMap[run] = 0;
328 fRunList->AddEntrySort(TString::Format("%d", run), run);
329 fRunList->Layout();
dc836d53 330
331 if (fAutoRun->IsOn())
332 {
333 StartAutoRunTimer(run);
334 }
c6d78c69 335 }
336 else
337 {
338 Error("StartOfRun", "Run %d already registered.", run);
339 }
340}
341
342void AliOnlineReco::EndOfRun(Int_t run)
343{
48c0589a 344 // Slot called from DIM handler on stop of run.
345
c6d78c69 346 mIntInt_i i = fRun2PidMap.find(run);
347 if (i != fRun2PidMap.end())
348 {
349 Int_t pid = i->second;
350 fRunList->RemoveEntry(run);
351 fRunList->Layout();
352 fRun2PidMap.erase(i);
353 if (pid)
354 {
dc836d53 355 KillPid(pid);
c6d78c69 356 }
357 gClient->NeedRedraw(fRunList);
dc836d53 358
359 if (fAutoRunRunning == run)
360 {
361 fAutoRunRunning = 0;
362 }
c6d78c69 363 }
364 else
365 {
366 Error("EndOfRun", "Run %d not registered.", run);
367 }
368}
369
370//------------------------------------------------------------------------------
371// Handlers of OS signals.
372//------------------------------------------------------------------------------
373
93624d6b 374void AliOnlineReco::ChildProcTerm(Int_t pid, Int_t status)
c6d78c69 375{
48c0589a 376 // Slot called on termination of child process.
b6269fdc 377
93624d6b 378 printf("child process termination pid=%d, status=%d...\n", pid, status);
c6d78c69 379
380 mIntInt_i i = FindMapEntryByPid(pid);
381 if (i != fRun2PidMap.end())
382 {
383 Int_t run = i->first;
384 fRunList->RemoveEntry(run);
67fc34c9 385
386 // clean (remove) run's reconstructed directory
387 gSystem->Exec(Form("rm -rf %s/reco/run%d_%d",gSystem->Getenv("ONLINERECO_BASE_DIR"),run,pid));
388
c6d78c69 389 if (status == 0)
390 {
82142d1d 391 fRunList->AddEntrySort(TString::Format("%-20d -- PROCESSED", run), run);
c6d78c69 392 }
393 else
394 {
82142d1d 395 fRunList->AddEntrySort(TString::Format("%-20d -- PROCESSED [%d]", run, status), run);
c6d78c69 396 }
397 fRunList->Layout();
e849dadd 398 fRun2PidMap.erase(i);
dc836d53 399
400 if (fAutoRunRunning == run && fAutoRun->IsOn())
401 {
402 fAutoRunRunning = 0;
403 StartAutoRunTimer(run);
404 }
405 else
406 {
407 fAutoRunRunning = 0;
408 }
c6d78c69 409 }
410 else
411 {
93624d6b 412 Warning("ChildProcTerm", "Process with pid=%d not registered.", pid);
c6d78c69 413 }
e849dadd 414
415 Emit("ChildProcTerm(Int_t, Int_t)");
416}
417
418void AliOnlineReco::ExitLoopChildProcTerm()
419{
420 if(fDoExit)
421 DoExit();
c6d78c69 422}
423
424//------------------------------------------------------------------------------
425// Handlers of button signals.
426//------------------------------------------------------------------------------
427
dc836d53 428void AliOnlineReco::DoAutoRun()
429{
48c0589a 430 // Slot called from auto-run check-box.
431
dc836d53 432 Bool_t autoRun = fAutoRun->IsOn();
433
434 if (autoRun)
435 fStartButt->SetEnabled(kFALSE);
436 else
437 fStartButt->SetEnabled(kTRUE);
438}
439
c6d78c69 440void AliOnlineReco::DoStart()
441{
48c0589a 442 // Slot called from Start button.
443
c6d78c69 444 Int_t run = fRunList->GetSelected();
445 mIntInt_i i = fRun2PidMap.find(run);
446
447 if (i == fRun2PidMap.end())
448 {
449 Error("DoStart", "no selection");
450 return;
451 }
452
dc836d53 453 StartAliEve(i);
c6d78c69 454}
455
456void AliOnlineReco::DoStop()
457{
48c0589a 458 // Slot called from Stop button.
459
c6d78c69 460 Int_t run = fRunList->GetSelected();
461 mIntInt_i i = fRun2PidMap.find(run);
462
463 if (i == fRun2PidMap.end())
464 {
465 Error("DoStop", "no selection");
466 return;
467 }
468
469 Int_t pid = i->second;
470 if (pid)
471 {
dc836d53 472 KillPid(pid);
c6d78c69 473 }
474 else
475 {
476 Error("DoStop", "Process not running.");
477 }
478}
479
dc836d53 480void AliOnlineReco::DoExit()
c6d78c69 481{
e849dadd 482 // Slot called from Exit button or CloseWindow.
483
484 // kill all started processes
485 Int_t pid;
486
487 // disable all widgets & AutoRunTimer
488 // so that user does not initiate other GUI signals
489 if(!fDoExit){
490 fAutoRun->SetEnabled(kFALSE);
491 fStartButt->SetEnabled(kFALSE);
492 fStopButt->SetEnabled(kFALSE);
493 fExitButt->SetEnabled(kFALSE);
494
495 StopAutoRunTimer();
496 fDoExit = kTRUE;
497 gROOT->SetInterrupt(kTRUE);
498 }
499
500 gSystem->ProcessEvents();
501
502 // clear runs std::map
503 for(mIntInt_i i = fRun2PidMap.begin(); i != fRun2PidMap.end(); i++)
504 {
505 pid = i->second;
506
507 if(pid==0)
508 {
509 fRun2PidMap.erase(i); // if process is not started just remove it from map
510 }
511 else
512 {
513 // send kill signal to started process
514 KillPid(pid);
515
516 // we need to exit loop to let ROOT process events list
517 // after kill signal above, process pid starts signal AliChildProcTerminator::ChildProcTerm(int, int)
518 // and arrives in AliOnlineReco::ChildProcTerm(int, int)
519 // after this we return in DoExit() to process next run
520 break;
521 }
522
523 }
524
525 // we can exit after we killed all processes
526 if(fRun2PidMap.empty() ) gSystem->ExitLoop();
c6d78c69 527}
528
529void AliOnlineReco::CloseWindow()
530{
48c0589a 531 // Virtual method called when window-manager close-window button is pressed.
e849dadd 532
533 DoExit();
534
c6d78c69 535}
a15a9f84 536
48c0589a 537Int_t AliOnlineReco::RetrieveGRP(UInt_t run, TString &gdc)
538{
539 // Retrieve GRP entry for given run from aldaqdb.
a15a9f84 540
8f772d4b 541 TString dbHost = gSystem->Getenv("ONLINERECO_DB_HOST");
542 if (dbHost.IsNull())
543 {
544 dbHost = "aldaqdb";
545 }
546
547 TString dbPort = gSystem->Getenv("ONLINERECO_DB_PORT");
548 if (dbPort.IsNull())
549 {
550 dbPort = "0";
551 }
552
553 TString dbName = gSystem->Getenv("ONLINERECO_DB_NAME");
554 if (dbName.IsNull())
555 {
556 dbName = "LOGBOOK";
557 }
558
559 TString user = gSystem->Getenv("ONLINERECO_DB_USER");
560 if (user.IsNull())
561 {
562 user = "logbook";
563 }
564
565 TString password = gSystem->Getenv("ONLINERECO_DB_PASSWORD");
566 if (password.IsNull())
567 {
568 password = "alice";
569 }
570
571 Int_t ret=AliGRPPreprocessor::ReceivePromptRecoParameters(run, dbHost.Data(),
b6269fdc 572 dbPort.Atoi(), dbName.Data(),
573 user.Data(), password.Data(),
574 Form("local://%s",gSystem->pwd()),
575 gdc);
8f772d4b 576
a15a9f84 577 if(ret>0) Info("RetrieveGRP","Last run of the same type is: %d",ret);
578 else if(ret==0) Warning("RetrieveGRP","No previous run of the same type found");
579 else if(ret<0) Error("Retrieve","Error code while retrieving GRP parameters returned: %d",ret);
580 return(ret);
581}