/*
$Log$
+ Revision 1.15 2007/12/10 18:29:23 acolla
+ Some log added to the listen mode
+
+ Revision 1.14 2007/12/07 19:14:36 acolla
+ in AliShuttleTrigger:
+
+ Added automatic collection of new runs on a regular time basis (settable from the configuration)
+
+ in AliShuttleConfig: new members
+
+ - triggerWait: time to wait for DIM trigger (s) before starting automatic collection of new runs
+ - mode: run mode (test, prod) -> used to build log folder (logs or logs_PROD)
+
+ in AliShuttle:
+
+ - logs now stored in logs/#RUN/DET_#RUN.log
+
+ Revision 1.13 2006/11/16 16:16:48 jgrosseo
+ introducing strict run ordering flag
+ removed giving preprocessor name to preprocessor, they have to know their name themselves ;-)
+
+ Revision 1.12 2006/10/20 15:22:59 jgrosseo
+ o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child
+ o) Merging Collect, CollectAll, CollectNew function
+ o) Removing implementation of empty copy constructors (declaration still there!)
+
+ Revision 1.11 2006/10/02 16:38:39 jgrosseo
+ update (alberto):
+ fixed memory leaks
+ storing of objects that failed to be stored to the grid before
+ interfacing of shuttle status table in daq system
+
+ Revision 1.10 2006/08/15 10:50:00 jgrosseo
+ effc++ corrections (alberto)
+
Revision 1.9 2006/08/08 14:19:29 jgrosseo
Update to shuttle classes (Alberto)
//
// This class is to deal with DAQ LogBook and DAQ "end of run" notification.
// It has severeal two modes:
-// 1) syncrhnized - Collect(), CollectNew() and CollectAll methods
+// 1) synchronized - Collect()
// 2) asynchronized - Run() - starts listening for DAQ "end of run"
// notification by DIM service.
//
#include "AliShuttleTrigger.h"
-#include <TSQLServer.h>
-#include <TSQLResult.h>
-#include <TSQLRow.h>
-#include <TObjArray.h>
#include <TSystem.h>
+#include <TObjString.h>
#include "AliLog.h"
-#include "AliCDBManager.h"
-#include "AliCDBStorage.h"
-#include "AliCDBEntry.h"
-
-#include "AliDCSValue.h"
#include "AliShuttleConfig.h"
#include "AliShuttle.h"
#include "DATENotifier.h"
-ClassImp(TerminateSignalHandler)
-
-//______________________________________________________________________
-TerminateSignalHandler::TerminateSignalHandler(const TerminateSignalHandler& /*other*/):
-TSignalHandler(), fTrigger()
-{
-// copy constructor (not implemented)
-
-}
-
-//______________________________________________________________________
-TerminateSignalHandler &TerminateSignalHandler::operator=(const TerminateSignalHandler& /*other*/)
-{
-// assignment operator (not implemented)
+#include <fstream>
-return *this;
-}
+ClassImp(TerminateSignalHandler)
+ClassImp(AliShuttleTrigger)
//______________________________________________________________________________________________
-Bool_t TerminateSignalHandler::Notify()
+Bool_t TerminateSignalHandler::Notify()
{
// Sentd terminate command to the Shuttle trigger
}
//______________________________________________________________________________________________
-//______________________________________________________________________________________________
-
-ClassImp(AliShuttleTrigger)
-
-//______________________________________________________________________________________________
-AliShuttleTrigger::AliShuttleTrigger(const AliShuttleConfig* config,
- UInt_t timeout, Int_t retries):
+AliShuttleTrigger::AliShuttleTrigger(const AliShuttleConfig* config):
fConfig(config), fShuttle(NULL),
- fNotified(kFALSE), fTerminate(kFALSE), fLastRun(0),
+ fNotified(kFALSE), fTerminate(kFALSE),
fMutex(), fCondition(&fMutex),
- fQuitSignalHandler(this, kSigQuit),
- fInterruptSignalHandler(this, kSigInterrupt)
+ fQuitSignalHandler(0),
+ fInterruptSignalHandler(0),
+ fLastMailDiskSpace(0)
{
//
// config - pointer to the AliShuttleConfig object which represents
// localStorage (local) CDB storage to be used if mainStorage is unavailable
//
+ if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
+ UInt_t timeout = fConfig->GetDCSTimeOut();
+ Int_t retries = fConfig->GetDCSRetries();
fShuttle = new AliShuttle(config, timeout, retries);
- gSystem->AddSignalHandler(&fQuitSignalHandler);
- gSystem->AddSignalHandler(&fInterruptSignalHandler);
-}
-
-
+ fQuitSignalHandler = new TerminateSignalHandler(this, kSigQuit);
+ fInterruptSignalHandler = new TerminateSignalHandler(this, kSigInterrupt);
-//______________________________________________________________________
-AliShuttleTrigger::AliShuttleTrigger(const AliShuttleTrigger& /*other*/):
- TObject(), fConfig(), fShuttle(NULL),
- fNotified(kFALSE), fTerminate(kFALSE), fLastRun(0),
- fMutex(), fCondition(&fMutex),
- fQuitSignalHandler(this, kSigQuit),
- fInterruptSignalHandler(this, kSigInterrupt)
-
-{
-// copy constructor (not implemented)
-
-}
-
-//______________________________________________________________________
-AliShuttleTrigger &AliShuttleTrigger::operator=(const AliShuttleTrigger& /*other*/)
-{
-// assignment operator (not implemented)
+ gSystem->AddSignalHandler(fQuitSignalHandler);
+ gSystem->AddSignalHandler(fInterruptSignalHandler);
-return *this;
}
-
-
-
-
//______________________________________________________________________________________________
AliShuttleTrigger::~AliShuttleTrigger()
{
-// destructor
+ // destructor
- gSystem->RemoveSignalHandler(&fQuitSignalHandler);
- gSystem->RemoveSignalHandler(&fInterruptSignalHandler);
+ gSystem->RemoveSignalHandler(fQuitSignalHandler);
+ gSystem->RemoveSignalHandler(fInterruptSignalHandler);
delete fShuttle;
+
+ delete fQuitSignalHandler;
+ fQuitSignalHandler = 0;
+
+ delete fInterruptSignalHandler;
+ fInterruptSignalHandler = 0;
}
//______________________________________________________________________________________________
Bool_t AliShuttleTrigger::Notify() {
//
- // Trigger CollectNew() methods in asynchronized (listen) mode.
+ // Trigger Collect() methods in asynchronized (listen) mode.
// Usually called automaticly by DATENotifier on "end of run"
// notification event.
//
fCondition.Signal();
}
+//______________________________________________________________________________________________
+void AliShuttleTrigger::CheckTerminate()
+{
+ //
+ // Checks if the Shuttle got an external terminate request by a created file
+ // This is an alternative to the signal which causes problems with the API libraries
+ //
+
+ if (strlen(fConfig->GetTerminateFilePath()) == 0)
+ return;
+
+ if (gSystem->AccessPathName(fConfig->GetTerminateFilePath()) == kFALSE)
+ {
+ AliInfo("Terminate file exists. Terminating Shuttle...");
+ fTerminate = kTRUE;
+ }
+}
+
//______________________________________________________________________________________________
void AliShuttleTrigger::Run() {
//
// AliShuttleTrigger main loop for asynchronized (listen) mode.
// It spawns DIM service listener and waits for DAQ "end of run"
- // notification. Calls CollectNew() on notification.
+ // notification. Calls Collect() on notification.
//
fTerminate = kFALSE;
- DATENotifier* notifier = new DATENotifier(this, "/DATE/LOGBOOK/UPDATE");
+ DATENotifier* notifier = new DATENotifier(this, "/LOGBOOK/SUBSCRIBE/ECS_EOR");
+ Int_t nTry=0;
+ Int_t nMaxTry = fConfig->GetMaxRetries()+1;
+ Int_t received=0;
+
+ AliInfo("Listening for ECS trigger");
+
while (1) {
fMutex.Lock();
while (!(fNotified || fTerminate)) {
- fCondition.Wait();
+ received=fCondition.TimedWaitRelative(1000*fConfig->GetTriggerWait());
+ CheckTerminate();
+ if (received==1) break; // 1 = timeout
}
fNotified = kFALSE;
AliInfo("Terminated.");
break;
}
-
- CollectNew();
- }
-
- delete notifier;
-}
-
-//______________________________________________________________________________________________
-Bool_t AliShuttleTrigger::RetrieveDATEEntries(const char* whereClause,
- TObjArray& entries)
-{
-// Retrieve start time and end time for all runs in the DAQ logbook
-// that aren't processed yet
-
- TString sqlQuery;
- sqlQuery = Form("select run, time_start, time_end from logbook %s order by run",
- whereClause);
-
- TSQLServer* aServer;
- TString logbookHost=Form("mysql://%s", fConfig->GetDAQlbHost());
-
- aServer = TSQLServer::Connect(logbookHost,
- fConfig->GetDAQlbUser(),
- fConfig->GetDAQlbPass());
- if (!aServer) {
- AliError("Can't establish connection to DAQ log book DB!");
- return kFALSE;
- }
-
- aServer->GetTables("REFSYSLOG");
-
- TSQLResult* aResult;
- aResult = aServer->Query(sqlQuery);
- if (!aResult) {
- AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
- delete aServer;
- return kFALSE;
- }
-
- if (aResult->GetFieldCount() != 3) {
- AliError("Invalid SQL result field number!");
- delete aResult;
- delete aServer;
- return kFALSE;
- }
-
- TSQLRow* aRow;
- while ((aRow = aResult->Next())) {
- TString runString(aRow->GetField(0), aRow->GetFieldLength(0));
- Int_t run = runString.Atoi();
-
- TString startTimeString(aRow->GetField(1),
- aRow->GetFieldLength(1));
- Int_t startTime = startTimeString.Atoi();
- if (!startTime) {
- AliWarning(Form("Zero StartTime for run <%d>!", run));
- AliWarning("Going to skip this run!");
- continue;
- }
-
- TString endTimeString(aRow->GetField(2),
- aRow->GetFieldLength(2));
- Int_t endTime = endTimeString.Atoi();
- if (!endTime) {
- AliWarning(Form("Zero EndTime for run <%d>!", run));
- AliWarning("Going to skip this run!");
- continue;
+
+ if (received == 0)
+ {
+ AliInfo("Trigger from ECS received!");
+ } else if (received == 1) {
+ AliInfo(Form("Timeout (%d s) waiting for trigger. "
+ "Starting collection of new runs!",
+ fConfig->GetTriggerWait()));
+ } else {
+ AliInfo("Error receiving trigger from ECS!");
+ break;
}
-
- if (startTime > endTime) {
- AliWarning(Form("StartTime bigger than EndTime for run <%d>", run));
- AliWarning("Going to skip this run!");
- continue;
+
+ nTry++;
+ AliInfo(Form("Received %d triggers so far", nTry));
+
+ if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
+ {
+ if(nTry>=nMaxTry)
+ {
+ AliInfo(Form("Collect() ran more than %d times -> Exiting!",
+ nMaxTry));
+ break;
+ }
}
-
- entries.AddLast(new AliShuttleTriggerDATEEntry(run, startTime, endTime));
- delete aRow;
+
+ Collect();
+ CheckTerminate();
}
- delete aResult;
-
- aServer->Close();
- delete aServer;
-
- entries.SetOwner(1);
-
- return kTRUE;
-}
-
-//______________________________________________________________________________________________
-Bool_t AliShuttleTrigger::RetrieveConditionsData(const TObjArray& dateEntries, Bool_t updateLastRun)
-{
- // Retrieve conditions data for all runs that aren't processed yet
-
- Bool_t hasError = kFALSE;
-
- TIter iter(&dateEntries);
- AliShuttleTriggerDATEEntry* anEntry;
-
- while ((anEntry = (AliShuttleTriggerDATEEntry*) iter.Next()))
- {
- if (!fShuttle->Process(anEntry->GetRun(),
- anEntry->GetStartTime(),
- anEntry->GetEndTime()))
- {
- hasError = kTRUE;
- }
-
- if (!hasError && updateLastRun && fLastRun < anEntry->GetRun())
- {
- fLastRun = anEntry->GetRun();
- WriteLastRun();
- }
- }
-
- return hasError == kFALSE;
-}
-
-//______________________________________________________________________________________________
-Bool_t AliShuttleTrigger::ReadLastRun()
-{
- // reads the last processed run from local CDB
-
- AliCDBEntry* cdbEntry = AliCDBManager::Instance()->GetStorage(AliShuttle::GetLocalCDB())
- ->Get("/SHUTTLE/SYSTEM/LASTRUN", 0);
-
- if (cdbEntry)
- {
- TObject* anObject = cdbEntry->GetObject();
- if (anObject == NULL || anObject->IsA() != AliDCSValue::Class())
- {
- AliError("Invalid last run object stored to CDB!");
- return kFALSE;
- }
- AliDCSValue* dcsValue = (AliDCSValue*) anObject;
- fLastRun = dcsValue->GetInt();
-
- delete cdbEntry;
- }
- else
- {
- AliFatal("No last run number stored. Please set first. Aborting");
- return kFALSE;
- }
-
- AliInfo(Form("Last run number <%d>", fLastRun));
-
- return kTRUE;
-}
-
-//______________________________________________________________________________________________
-Bool_t AliShuttleTrigger::WriteLastRun()
-{
- // writes the last succesfully processed run to local CDB
-
- AliDCSValue lastRunObj(fLastRun, 0);
- AliCDBMetaData metaData;
- AliCDBId cdbID(AliCDBPath("SHUTTLE", "SYSTEM", "LASTRUN"), 0, 0);
-
- UInt_t result = AliCDBManager::Instance()->GetStorage(AliShuttle::GetLocalCDB())
- ->Put(&lastRunObj, cdbID, &metaData);
-
- if (!result) {
- AliError("Can't store last run to CDB!");
- return kFALSE;
- }
-
- return kTRUE;
-}
-
-//______________________________________________________________________________________________
-Bool_t AliShuttleTrigger::SetNewLastRun(Int_t run)
-{
- // sets a new run manually, use with caution!
-
- fShuttle->Log("SHUTTLE", Form("Setting last run manually to %d", run));
-
- fLastRun = run;
- return WriteLastRun();
+ delete notifier;
}
//______________________________________________________________________________________________
Bool_t AliShuttleTrigger::Collect(Int_t run)
{
//
- // Collects conditions date for the given run.
+ // this function creates a thread that runs the shuttle
+ // then it checks if the shuttle is still running by checking the monitoring functions of the shuttle
//
- AliInfo(Form("Collecting conditions data for run <%d> ...", run));
+ // first checking disk space
+ Long_t id = 0;
+ Long_t bsize = 0;
+ Long_t blocks = 0;
+ Long_t bfree = 0;
- TString whereClause("where run = ");
- whereClause += run;
+ gSystem->GetFsInfo(fConfig->GetShuttleFileSystem(), &id, &bsize, &blocks, &bfree);
- TObjArray dateEntries;
- if (!RetrieveDATEEntries(whereClause, dateEntries)) {
- AliError("Can't retrieve entries from DAQ log book.");
- return kFALSE;
- }
-
- if (!dateEntries.GetEntriesFast()) {
- AliError(Form("There isn't entry for run <%d> in DAQ log book!",
- run));
- return kFALSE;
- }
-
- if (dateEntries.GetEntriesFast() > 1) {
- AliError(Form("There is more than one entry for run <%d> in DAQ log book", run));
- return kFALSE;
- }
-
- if (!RetrieveConditionsData(dateEntries, kFALSE)) {
- AliError("An error occured during conditions data retrieval!");
- return kFALSE;
- }
-
- return kTRUE;
-}
-
-//______________________________________________________________________________________________
-Bool_t AliShuttleTrigger::CollectNew()
-{
- //
- // Collects conditions data for all new run written to DAQ LogBook.
- //
+ AliInfo(Form("n. of free blocks = %d, total n. of blocks = %d",bfree,blocks));
+ Int_t spaceFree = (Int_t)(((Float_t)bfree/(Float_t)blocks)*100);
- AliInfo("Collecting conditions data for new runs ...");
-
- if (!ReadLastRun())
- {
- AliError("Retrieving of last run failed");
- return kFALSE;
- }
-
- TString whereClause("where run > ");
- whereClause += fLastRun;
+ if (spaceFree < fConfig->GetFreeDiskWarningThreshold()) {
+ AliWarning(Form("************** Free space left = %d%%, below the Warning Threshold (%d%%)",spaceFree,fConfig->GetFreeDiskWarningThreshold()));
+ if (TMath::Abs(time(0) - fLastMailDiskSpace) >= 86400){ // 86400 = n. of seconds in 1 d
+ SendMailDiskSpace(fConfig->GetFreeDiskWarningThreshold());
+ fLastMailDiskSpace = time(0); // resetting fLastMailDiskSpace to time(0) = now
+ }
+ if (spaceFree < fConfig->GetFreeDiskFatalThreshold()){
+ AliError(Form("*************** Free space left = %d%%, below the Fatal Threshold (%d%%), terminating....",spaceFree,fConfig->GetFreeDiskFatalThreshold()));
+ SendMailDiskSpace(fConfig->GetFreeDiskFatalThreshold());
+ fTerminate = kTRUE; // terminating....
+ }
+ }
- TObjArray dateEntries;
- if (!RetrieveDATEEntries(whereClause, dateEntries)) {
- AliError("Can't retrieve entries from DAQ log book.");
+ if (fTerminate) {
return kFALSE;
}
- if (!RetrieveConditionsData(dateEntries, kTRUE)) {
- AliError("Process of at least one run failed!");
- // return kFALSE;
- }
-
- return kTRUE;
+ return fShuttle->Collect(run);
}
-
//______________________________________________________________________________________________
-Bool_t AliShuttleTrigger::CollectAll()
+Bool_t AliShuttleTrigger::SendMailDiskSpace(Short_t percentage)
{
//
- // Collects conditions data for all run written in DAQ LogBook.
+ // sends a mail to the shuttle experts in case of free disk space < theshold
//
+
+
+ AliInfo("******************* Sending the Mail!! *********************");
+ if (!fConfig->SendMail())
+ return kTRUE;
+
+ Int_t runMode = (Int_t)fConfig->GetRunMode();
+ TString tmpStr;
+ if (runMode == 0) tmpStr = " Nightly Test:";
+ else tmpStr = " Data Taking:";
+ void* dir = gSystem->OpenDirectory(fShuttle->GetShuttleLogDir());
+ if (dir == NULL)
+ {
+ if (gSystem->mkdir(fShuttle->GetShuttleLogDir(), kTRUE))
+ {
+ AliWarning(Form("SendMail - Can't open directory <%s>", fShuttle->GetShuttleLogDir()));
+ return kFALSE;
+ }
- if (!ReadLastRun())
- {
- AliError("Retrieving of last run failed");
- return kFALSE;
- }
-
- AliInfo("Collecting conditions data for all runs ...");
-
- TObjArray dateEntries;
- if (!RetrieveDATEEntries("", dateEntries)) {
- AliError("Can't retrieve entries from DAQ log book.");
- return kFALSE;
+ } else {
+ gSystem->FreeDirectory(dir);
}
- if (!RetrieveConditionsData(dateEntries, kTRUE)) {
- AliError("An error occured during conditions data retrieval!");
- return kFALSE;
+ // SHUTTLE responsibles in to
+ TString to="";
+ TIter iterAdmins(fConfig->GetAdmins(AliShuttleConfig::kGlobal));
+ TObjString *anAdmin=0;
+ while ((anAdmin = (TObjString*) iterAdmins.Next()))
+ {
+ to += Form("%s,", anAdmin->GetName());
}
-
- return kTRUE;
+ if (to.Length() > 0)
+ to.Remove(to.Length()-1);
+ AliDebug(2, Form("to: %s",to.Data()));
+
+ // mail body
+ TString bodyFileName;
+ bodyFileName.Form("%s/mail.body", fShuttle->GetShuttleLogDir());
+ gSystem->ExpandPathName(bodyFileName);
+
+ ofstream mailBody;
+ mailBody.open(bodyFileName, ofstream::out);
+
+ if (!mailBody.is_open())
+ {
+ AliWarning(Form("Could not open mail body file %s", bodyFileName.Data()));
+ return kFALSE;
+ }
+
+ TString subject;
+ TString body;
+
+ Int_t percentage_used = 100 - percentage;
+ subject = Form("%s CRITICAL Disk Space usage exceeds %d%c!",
+ tmpStr.Data(),percentage_used,'%');
+ AliDebug(2, Form("subject: %s", subject.Data()));
+
+ body = "Dear SHUTTLE experts, \n\n";
+ body += "The usage of the disk space on the shuttle machine has overcome \n";
+ body += Form("the threshold of %d%%. \n \n",percentage_used);
+ body += "Please check! \n \n";
+ body += "Please do not answer this message directly, it is automatically generated.\n\n";
+ body += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
+
+ AliDebug(2, Form("Body : %s", body.Data()));
+
+ mailBody << body.Data();
+ mailBody.close();
+
+ // send mail!
+ TString mailCommand = Form("mail -s \"%s\" %s < %s",
+ subject.Data(),
+ to.Data(),
+ bodyFileName.Data());
+ AliDebug(2, Form("mail command: %s", mailCommand.Data()));
+
+ Bool_t result = gSystem->Exec(mailCommand.Data());
+
+ return result == 0;
}
-