X-Git-Url: http://git.uio.no/git/?a=blobdiff_plain;f=SHUTTLE%2FAliShuttleTrigger.cxx;h=9105d3559442db34aef7f3158d4b0f351f7156e0;hb=8d637fa1e7039fe08ff0dac92f08c06c539df966;hp=b0dae21fa30fafe8e4bb68ebc4f91a05be73ad6e;hpb=7bfb20903909b5071bf3fb12acf008f5e0bdf15b;p=u%2Fmrichter%2FAliRoot.git diff --git a/SHUTTLE/AliShuttleTrigger.cxx b/SHUTTLE/AliShuttleTrigger.cxx index b0dae21fa30..9105d355944 100644 --- a/SHUTTLE/AliShuttleTrigger.cxx +++ b/SHUTTLE/AliShuttleTrigger.cxx @@ -15,6 +15,54 @@ /* $Log$ + Revision 1.15 2007/12/10 18:29:23 acolla + Some log added to the listen mode + + Revision 1.14 2007/12/07 19:14:36 acolla + in AliShuttleTrigger: + + Added automatic collection of new runs on a regular time basis (settable from the configuration) + + in AliShuttleConfig: new members + + - triggerWait: time to wait for DIM trigger (s) before starting automatic collection of new runs + - mode: run mode (test, prod) -> used to build log folder (logs or logs_PROD) + + in AliShuttle: + + - logs now stored in logs/#RUN/DET_#RUN.log + + Revision 1.13 2006/11/16 16:16:48 jgrosseo + introducing strict run ordering flag + removed giving preprocessor name to preprocessor, they have to know their name themselves ;-) + + Revision 1.12 2006/10/20 15:22:59 jgrosseo + o) Adding time out to the execution of the preprocessors: The Shuttle forks and the parent process monitors the child + o) Merging Collect, CollectAll, CollectNew function + o) Removing implementation of empty copy constructors (declaration still there!) + + Revision 1.11 2006/10/02 16:38:39 jgrosseo + update (alberto): + fixed memory leaks + storing of objects that failed to be stored to the grid before + interfacing of shuttle status table in daq system + + Revision 1.10 2006/08/15 10:50:00 jgrosseo + effc++ corrections (alberto) + + Revision 1.9 2006/08/08 14:19:29 jgrosseo + Update to shuttle classes (Alberto) + + - Possibility to set the full object's path in the Preprocessor's and + Shuttle's Store functions + - Possibility to extend the object's run validity in the same classes + ("startValidity" and "validityInfinite" parameters) + - Implementation of the StoreReferenceData function to store reference + data in a dedicated CDB storage. + + Revision 1.8 2006/07/21 07:37:20 jgrosseo + last run is stored after each run + Revision 1.7 2006/07/20 09:54:40 jgrosseo introducing status management: The processing per subdetector is divided into several steps, after each step the status is stored on disk. If the system crashes in any of the steps the Shuttle @@ -52,49 +100,28 @@ // // This class is to deal with DAQ LogBook and DAQ "end of run" notification. // It has severeal two modes: -// 1) syncrhnized - Collect(), CollectNew() and CollectAll methods +// 1) synchronized - Collect() // 2) asynchronized - Run() - starts listening for DAQ "end of run" // notification by DIM service. // #include "AliShuttleTrigger.h" -#include -#include -#include -#include #include +#include #include "AliLog.h" -#include "AliCDBManager.h" -#include "AliCDBStorage.h" -#include "AliCDBEntry.h" - -#include "AliDCSValue.h" #include "AliShuttleConfig.h" #include "AliShuttle.h" #include "DATENotifier.h" -ClassImp(TerminateSignalHandler) - -//______________________________________________________________________ -TerminateSignalHandler::TerminateSignalHandler(const TerminateSignalHandler& /*other*/): -TSignalHandler() -{ -// copy constructor (not implemented) +#include -} - -//______________________________________________________________________ -TerminateSignalHandler &TerminateSignalHandler::operator=(const TerminateSignalHandler& /*other*/) -{ -// assignment operator (not implemented) - -return *this; -} +ClassImp(TerminateSignalHandler) +ClassImp(AliShuttleTrigger) //______________________________________________________________________________________________ -Bool_t TerminateSignalHandler::Notify() +Bool_t TerminateSignalHandler::Notify() { // Sentd terminate command to the Shuttle trigger @@ -105,17 +132,13 @@ Bool_t TerminateSignalHandler::Notify() } //______________________________________________________________________________________________ -//______________________________________________________________________________________________ - -ClassImp(AliShuttleTrigger) - -//______________________________________________________________________________________________ -AliShuttleTrigger::AliShuttleTrigger(const AliShuttleConfig* config, - UInt_t timeout, Int_t retries): +AliShuttleTrigger::AliShuttleTrigger(const AliShuttleConfig* config): fConfig(config), fShuttle(NULL), - fNotified(kFALSE), fTerminate(kFALSE), fLastRun(0), fCondition(&fMutex), - fQuitSignalHandler(this, kSigQuit), - fInterruptSignalHandler(this, kSigInterrupt) + fNotified(kFALSE), fTerminate(kFALSE), + fMutex(), fCondition(&fMutex), + fQuitSignalHandler(0), + fInterruptSignalHandler(0), + fLastMailDiskSpace(0) { // // config - pointer to the AliShuttleConfig object which represents @@ -124,49 +147,40 @@ AliShuttleTrigger::AliShuttleTrigger(const AliShuttleConfig* config, // localStorage (local) CDB storage to be used if mainStorage is unavailable // + if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********"); + UInt_t timeout = fConfig->GetDCSTimeOut(); + Int_t retries = fConfig->GetDCSRetries(); fShuttle = new AliShuttle(config, timeout, retries); - gSystem->AddSignalHandler(&fQuitSignalHandler); - gSystem->AddSignalHandler(&fInterruptSignalHandler); -} + fQuitSignalHandler = new TerminateSignalHandler(this, kSigQuit); + fInterruptSignalHandler = new TerminateSignalHandler(this, kSigInterrupt); - - -//______________________________________________________________________ -AliShuttleTrigger::AliShuttleTrigger(const AliShuttleTrigger& /*other*/): -TObject() -{ -// copy constructor (not implemented) + gSystem->AddSignalHandler(fQuitSignalHandler); + gSystem->AddSignalHandler(fInterruptSignalHandler); } -//______________________________________________________________________ -AliShuttleTrigger &AliShuttleTrigger::operator=(const AliShuttleTrigger& /*other*/) -{ -// assignment operator (not implemented) - -return *this; -} - - - - - //______________________________________________________________________________________________ AliShuttleTrigger::~AliShuttleTrigger() { -// destructor + // destructor - gSystem->RemoveSignalHandler(&fQuitSignalHandler); - gSystem->RemoveSignalHandler(&fInterruptSignalHandler); + gSystem->RemoveSignalHandler(fQuitSignalHandler); + gSystem->RemoveSignalHandler(fInterruptSignalHandler); delete fShuttle; + + delete fQuitSignalHandler; + fQuitSignalHandler = 0; + + delete fInterruptSignalHandler; + fInterruptSignalHandler = 0; } //______________________________________________________________________________________________ Bool_t AliShuttleTrigger::Notify() { // - // Trigger CollectNew() methods in asynchronized (listen) mode. + // Trigger Collect() methods in asynchronized (listen) mode. // Usually called automaticly by DATENotifier on "end of run" // notification event. // @@ -192,24 +206,50 @@ void AliShuttleTrigger::Terminate() { fCondition.Signal(); } +//______________________________________________________________________________________________ +void AliShuttleTrigger::CheckTerminate() +{ + // + // Checks if the Shuttle got an external terminate request by a created file + // This is an alternative to the signal which causes problems with the API libraries + // + + if (strlen(fConfig->GetTerminateFilePath()) == 0) + return; + + if (gSystem->AccessPathName(fConfig->GetTerminateFilePath()) == kFALSE) + { + AliInfo("Terminate file exists. Terminating Shuttle..."); + fTerminate = kTRUE; + } +} + //______________________________________________________________________________________________ void AliShuttleTrigger::Run() { // // AliShuttleTrigger main loop for asynchronized (listen) mode. // It spawns DIM service listener and waits for DAQ "end of run" - // notification. Calls CollectNew() on notification. + // notification. Calls Collect() on notification. // fTerminate = kFALSE; - DATENotifier* notifier = new DATENotifier(this, "/DATE/LOGBOOK/UPDATE"); + DATENotifier* notifier = new DATENotifier(this, "/LOGBOOK/SUBSCRIBE/ECS_EOR"); + Int_t nTry=0; + Int_t nMaxTry = fConfig->GetMaxRetries()+1; + Int_t received=0; + + AliInfo("Listening for ECS trigger"); + while (1) { fMutex.Lock(); while (!(fNotified || fTerminate)) { - fCondition.Wait(); + received=fCondition.TimedWaitRelative(1000*fConfig->GetTriggerWait()); + CheckTerminate(); + if (received==1) break; // 1 = timeout } fNotified = kFALSE; @@ -220,282 +260,160 @@ void AliShuttleTrigger::Run() { AliInfo("Terminated."); break; } - - CollectNew(); - } - - delete notifier; -} - -//______________________________________________________________________________________________ -Bool_t AliShuttleTrigger::RetrieveDATEEntries(const char* whereClause, - TObjArray& entries) -{ -// Retrieve start time and end time for all runs in the DAQ logbook -// that aren't processed yet - - TString sqlQuery; - sqlQuery = Form("select run, time_start, time_end from logbook %s order by run", - whereClause); - - TSQLServer* aServer; - TString logbookHost=Form("mysql://%s", fConfig->GetDAQlbHost()); - - aServer = TSQLServer::Connect(logbookHost, - fConfig->GetDAQlbUser(), - fConfig->GetDAQlbPass()); - if (!aServer) { - AliError("Can't establish connection to DAQ log book DB!"); - return kFALSE; - } - - aServer->GetTables("REFSYSLOG"); - - TSQLResult* aResult; - aResult = aServer->Query(sqlQuery); - if (!aResult) { - AliError(Form("Can't execute query <%s>!", sqlQuery.Data())); - delete aServer; - return kFALSE; - } - - if (aResult->GetFieldCount() != 3) { - AliError("Invalid SQL result field number!"); - delete aResult; - delete aServer; - return kFALSE; - } - - TSQLRow* aRow; - while ((aRow = aResult->Next())) { - TString runString(aRow->GetField(0), aRow->GetFieldLength(0)); - Int_t run = runString.Atoi(); - - TString startTimeString(aRow->GetField(1), - aRow->GetFieldLength(1)); - Int_t startTime = startTimeString.Atoi(); - if (!startTime) { - AliWarning(Form("Zero StartTime for run <%d>!", run)); - AliWarning("Going to skip this run!"); - continue; - } - - TString endTimeString(aRow->GetField(2), - aRow->GetFieldLength(2)); - Int_t endTime = endTimeString.Atoi(); - if (!endTime) { - AliWarning(Form("Zero EndTime for run <%d>!", run)); - AliWarning("Going to skip this run!"); - continue; + + if (received == 0) + { + AliInfo("Trigger from ECS received!"); + } else if (received == 1) { + AliInfo(Form("Timeout (%d s) waiting for trigger. " + "Starting collection of new runs!", + fConfig->GetTriggerWait())); + } else { + AliInfo("Error receiving trigger from ECS!"); + break; } - - if (startTime > endTime) { - AliWarning(Form("StartTime bigger than EndTime for run <%d>", run)); - AliWarning("Going to skip this run!"); - continue; + + nTry++; + AliInfo(Form("Received %d triggers so far", nTry)); + + if (fConfig->GetRunMode() == AliShuttleConfig::kTest) + { + if(nTry>=nMaxTry) + { + AliInfo(Form("Collect() ran more than %d times -> Exiting!", + nMaxTry)); + break; + } } - - entries.AddLast(new AliShuttleTriggerDATEEntry(run, startTime, endTime)); - delete aRow; + + Collect(); + CheckTerminate(); } - delete aResult; - - aServer->Close(); - delete aServer; - - entries.SetOwner(1); - - return kTRUE; -} - -//______________________________________________________________________________________________ -Bool_t AliShuttleTrigger::RetrieveConditionsData(const TObjArray& dateEntries, Bool_t updateLastRun) -{ - // Retrieve conditions data for all runs that aren't processed yet - - Bool_t hasError = kFALSE; - - TIter iter(&dateEntries); - AliShuttleTriggerDATEEntry* anEntry; - - while ((anEntry = (AliShuttleTriggerDATEEntry*) iter.Next())) - { - if (!fShuttle->Process(anEntry->GetRun(), - anEntry->GetStartTime(), - anEntry->GetEndTime())) - { - hasError = kTRUE; - } - - if (!hasError && updateLastRun && fLastRun < anEntry->GetRun()) - { - fLastRun = anEntry->GetRun(); - WriteLastRun(); - } - } - - return hasError == kFALSE; -} - -//______________________________________________________________________________________________ -Bool_t AliShuttleTrigger::ReadLastRun() -{ - // reads the last processed run from local CDB - - AliCDBEntry* cdbEntry = AliCDBManager::Instance()->GetStorage(AliShuttle::GetLocalURI()) - ->Get("/SHUTTLE/SYSTEM/LASTRUN", 0); - - if (cdbEntry) - { - TObject* anObject = cdbEntry->GetObject(); - if (anObject == NULL || anObject->IsA() != AliDCSValue::Class()) - { - AliError("Invalid last run object stored to CDB!"); - return kFALSE; - } - AliDCSValue* dcsValue = (AliDCSValue*) anObject; - fLastRun = dcsValue->GetInt(); - - delete cdbEntry; - } - else - { - AliFatal("No last run number stored. Please set first. Aborting"); - return kFALSE; - } - - AliInfo(Form("Last run number <%d>", fLastRun)); - - return kTRUE; -} - -//______________________________________________________________________________________________ -Bool_t AliShuttleTrigger::WriteLastRun() -{ - // writes the last succesfully processed run to local CDB - - AliDCSValue lastRunObj(fLastRun, 0); - AliCDBMetaData metaData; - AliCDBId cdbID(AliCDBPath("SHUTTLE", "SYSTEM", "LASTRUN"), 0, 0); - - UInt_t result = AliCDBManager::Instance()->GetStorage(AliShuttle::GetLocalURI()) - ->Put(&lastRunObj, cdbID, &metaData); - - if (!result) { - AliError("Can't store last run to CDB!"); - return kFALSE; - } - - return kTRUE; -} - -//______________________________________________________________________________________________ -Bool_t AliShuttleTrigger::SetNewLastRun(Int_t run) -{ - // sets a new run manually, use with caution! - - fShuttle->Log("SHUTTLE", Form("Setting last run manually to %d", run)); - - fLastRun = run; - return WriteLastRun(); + delete notifier; } //______________________________________________________________________________________________ Bool_t AliShuttleTrigger::Collect(Int_t run) { // - // Collects conditions date for the given run. + // this function creates a thread that runs the shuttle + // then it checks if the shuttle is still running by checking the monitoring functions of the shuttle // - AliInfo(Form("Collecting conditions data for run <%d> ...", run)); + // first checking disk space + Long_t id = 0; + Long_t bsize = 0; + Long_t blocks = 0; + Long_t bfree = 0; - TString whereClause("where run = "); - whereClause += run; + gSystem->GetFsInfo(fConfig->GetShuttleFileSystem(), &id, &bsize, &blocks, &bfree); - TObjArray dateEntries; - if (!RetrieveDATEEntries(whereClause, dateEntries)) { - AliError("Can't retrieve entries from DAQ log book."); - return kFALSE; - } - - if (!dateEntries.GetEntriesFast()) { - AliError(Form("There isn't entry for run <%d> in DAQ log book!", - run)); - return kFALSE; - } - - if (dateEntries.GetEntriesFast() > 1) { - AliError(Form("There is more than one entry for run <%d> in DAQ log book", run)); - return kFALSE; - } - - if (!RetrieveConditionsData(dateEntries, kFALSE)) { - AliError("An error occured during conditions data retrieval!"); - return kFALSE; - } - - return kTRUE; -} - -//______________________________________________________________________________________________ -Bool_t AliShuttleTrigger::CollectNew() -{ - // - // Collects conditions data for all new run written to DAQ LogBook. - // + AliInfo(Form("n. of free blocks = %ld, total n. of blocks = %ld",bfree,blocks)); + Int_t spaceFree = (Int_t)(((Float_t)bfree/(Float_t)blocks)*100); - AliInfo("Collecting conditions data for new runs ..."); - - if (!ReadLastRun()) - { - AliError("Retrieving of last run failed"); - return kFALSE; - } - - TString whereClause("where run > "); - whereClause += fLastRun; + if (spaceFree < fConfig->GetFreeDiskWarningThreshold()) { + AliWarning(Form("************** Free space left = %d%%, below the Warning Threshold (%d%%)",spaceFree,fConfig->GetFreeDiskWarningThreshold())); + if (TMath::Abs(time(0) - fLastMailDiskSpace) >= 86400){ // 86400 = n. of seconds in 1 d + SendMailDiskSpace(fConfig->GetFreeDiskWarningThreshold()); + fLastMailDiskSpace = time(0); // resetting fLastMailDiskSpace to time(0) = now + } + if (spaceFree < fConfig->GetFreeDiskFatalThreshold()){ + AliError(Form("*************** Free space left = %d%%, below the Fatal Threshold (%d%%), terminating....",spaceFree,fConfig->GetFreeDiskFatalThreshold())); + SendMailDiskSpace(fConfig->GetFreeDiskFatalThreshold()); + fTerminate = kTRUE; // terminating.... + } + } - TObjArray dateEntries; - if (!RetrieveDATEEntries(whereClause, dateEntries)) { - AliError("Can't retrieve entries from DAQ log book."); + if (fTerminate) { return kFALSE; } - if (!RetrieveConditionsData(dateEntries, kTRUE)) { - AliError("Process of at least one run failed!"); - // return kFALSE; - } - - return kTRUE; + return fShuttle->Collect(run); } - //______________________________________________________________________________________________ -Bool_t AliShuttleTrigger::CollectAll() +Bool_t AliShuttleTrigger::SendMailDiskSpace(Short_t percentage) { // - // Collects conditions data for all run written in DAQ LogBook. + // sends a mail to the shuttle experts in case of free disk space < theshold // + + + AliInfo("******************* Sending the Mail!! *********************"); + if (!fConfig->SendMail()) + return kTRUE; + + Int_t runMode = (Int_t)fConfig->GetRunMode(); + TString tmpStr; + if (runMode == 0) tmpStr = " Nightly Test:"; + else tmpStr = " Data Taking:"; + void* dir = gSystem->OpenDirectory(fShuttle->GetShuttleLogDir()); + if (dir == NULL) + { + if (gSystem->mkdir(fShuttle->GetShuttleLogDir(), kTRUE)) + { + AliWarning(Form("SendMail - Can't open directory <%s>", fShuttle->GetShuttleLogDir())); + return kFALSE; + } - if (!ReadLastRun()) - { - AliError("Retrieving of last run failed"); - return kFALSE; - } - - AliInfo("Collecting conditions data for all runs ..."); - - TObjArray dateEntries; - if (!RetrieveDATEEntries("", dateEntries)) { - AliError("Can't retrieve entries from DAQ log book."); - return kFALSE; + } else { + gSystem->FreeDirectory(dir); } - if (!RetrieveConditionsData(dateEntries, kTRUE)) { - AliError("An error occured during conditions data retrieval!"); - return kFALSE; + // SHUTTLE responsibles in to + TString to=""; + TIter iterAdmins(fConfig->GetAdmins(AliShuttleConfig::kGlobal)); + TObjString *anAdmin=0; + while ((anAdmin = (TObjString*) iterAdmins.Next())) + { + to += Form("%s,", anAdmin->GetName()); } - - return kTRUE; + if (to.Length() > 0) + to.Remove(to.Length()-1); + AliDebug(2, Form("to: %s",to.Data())); + + // mail body + TString bodyFileName; + bodyFileName.Form("%s/mail.body", fShuttle->GetShuttleLogDir()); + gSystem->ExpandPathName(bodyFileName); + + ofstream mailBody; + mailBody.open(bodyFileName, ofstream::out); + + if (!mailBody.is_open()) + { + AliWarning(Form("Could not open mail body file %s", bodyFileName.Data())); + return kFALSE; + } + + TString subject; + TString body; + + Int_t percentage_used = 100 - percentage; + subject = Form("%s CRITICAL Disk Space usage exceeds %d%c!", + tmpStr.Data(),percentage_used,'%'); + AliDebug(2, Form("subject: %s", subject.Data())); + + body = "Dear SHUTTLE experts, \n\n"; + body += "The usage of the disk space on the shuttle machine has overcome \n"; + body += Form("the threshold of %d%%. \n \n",percentage_used); + body += "Please check! \n \n"; + body += "Please do not answer this message directly, it is automatically generated.\n\n"; + body += "Greetings,\n\n \t\t\tthe SHUTTLE\n"; + + AliDebug(2, Form("Body : %s", body.Data())); + + mailBody << body.Data(); + mailBody.close(); + + // send mail! + TString mailCommand = Form("mail -s \"%s\" %s < %s", + subject.Data(), + to.Data(), + bodyFileName.Data()); + AliDebug(2, Form("mail command: %s", mailCommand.Data())); + + Bool_t result = gSystem->Exec(mailCommand.Data()); + + return result == 0; } -