]> git.uio.no Git - u/mrichter/AliRoot.git/blobdiff - SHUTTLE/AliShuttle.cxx
sending number of open runs also at the end of processing
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
index 92b7ac78d166635ae08f1c2517823ba03e473818..f77456d458bf2e49d2744bff4ea297b5605b42dd 100644 (file)
 
 /*
 $Log$
+Revision 1.81  2007/12/20 14:24:59  jgrosseo
+Do not increase count in case of StoreError
+
+Revision 1.80  2007/12/20 13:31:28  acolla
+Bug fix (Jan Fiete): recovering from StoreError, if the store to OCDB is successful,
+the Shuttle sets current detector's status=done
+
+Revision 1.79  2007/12/19 14:03:01  acolla
+
+detector name to build the lhcPeriod_DET is to be looked in "detector" column, not "partition"
+
+Revision 1.78  2007/12/19 11:50:41  acolla
+
+Raw data tag merged files is written in /alice/data/.../lhcPeriod_DET/runNb/raw if partition is made of DET only
+
+Revision 1.77  2007/12/19 11:16:16  acolla
+More meaningful log message added in GetFileSources
+
+Revision 1.76  2007/12/19 07:45:20  acolla
+bug fix in the name of the raw tag files (Raw instead of raw)
+
+Revision 1.75  2007/12/18 15:42:14  jgrosseo
+adding number of open runs to monitoring
+
+Revision 1.74  2007/12/17 03:23:32  jgrosseo
+several bugfixes
+added "empty preprocessor" as placeholder for Acorde in FDR
+
+Revision 1.73  2007/12/14 19:31:36  acolla
+Sending email to DCS experts is temporarily commented
+
+Revision 1.72  2007/12/13 15:44:28  acolla
+Run type added in mail sent to detector expert (eases understanding)
+
+Revision 1.71  2007/12/12 14:56:14  jgrosseo
+sending shuttle_ignore to ML also in case of 0 events
+
+Revision 1.70  2007/12/12 13:45:35  acolla
+Monalisa started in Collect() function. Alive message to monitor is sent at each Collect and every minute during preprocessor processing.
+
+Revision 1.69  2007/12/12 10:06:29  acolla
+in AliShuttle.cxx: SHUTTLE logbook is updated in case of invalid run times:
+
+time_start==0 && time_end==0
+
+logbook is NOT updated if time_start != 0 && time_end == 0, because it may mean that the run is still ongoing.
+
+Revision 1.68  2007/12/11 10:15:17  acolla
+Added marking SHUTTLE=DONE for invalid runs
+(invalid start time or end time) and runs with totalEvents < 1
+
+Revision 1.67  2007/12/07 19:14:36  acolla
+in AliShuttleTrigger:
+
+Added automatic collection of new runs on a regular time basis (settable from the configuration)
+
+in AliShuttleConfig: new members
+
+- triggerWait: time to wait for DIM trigger (s) before starting automatic collection of new runs
+- mode: run mode (test, prod) -> used to build log folder (logs or logs_PROD)
+
+in AliShuttle:
+
+- logs now stored in logs/#RUN/DET_#RUN.log
+
+Revision 1.66  2007/12/05 10:45:19  jgrosseo
+changed order of arguments to TMonaLisaWriter
+
+Revision 1.65  2007/11/26 16:58:37  acolla
+Monalisa configuration added: host and table name
+
+Revision 1.64  2007/11/13 16:15:47  acolla
+DCS map is stored in a file in the temp folder where the detector is processed.
+If the preprocessor fails, the temp folder is not removed. This will help the debugging of the problem.
+
+Revision 1.63  2007/11/02 10:53:16  acolla
+Protection added to AliShuttle::CopyFileLocally
+
+Revision 1.62  2007/10/31 18:23:13  acolla
+Furter developement on the Shuttle:
+
+- Shuttle now connects to the Grid as alidaq. The OCDB and Reference folders
+are now built from /alice/data, e.g.:
+/alice/data/2007/LHC07a/OCDB
+
+the year and LHC period are taken from the Shuttle.
+Raw metadata files are stored by GRP to:
+/alice/data/2007/LHC07a/<runNb>/Raw/RunMetadata.root
+
+- Shuttle sends a mail to DCS experts each time DP retrieval fails.
+
+Revision 1.61  2007/10/30 20:33:51  acolla
+Improved managing of temporary folders, which weren't correctly handled.
+Resolved bug introduced in StoreReferenceFile, which caused SPD preprocessor fail.
+
 Revision 1.60  2007/10/29 18:06:16  acolla
 
 New function StoreRunMetadataFile added to preprocessor and Shuttle interface
@@ -590,6 +685,7 @@ Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
                        Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
                                                "there are previous unprocessed runs!",
                                                fCurrentDetector.Data(), aLocId.GetPath().Data()));
+                       result = kFALSE;
                        continue;
                }
 
@@ -788,31 +884,29 @@ Bool_t AliShuttle::StoreRunMetadataFile(const char* localFile, const char* gridF
        TString localBaseFolder = sto->GetBaseFolder();
        
        // Build Run level folder
-       // folder = /alice/data/year/lhcPeriod/runNb/Raw
+       // folder = /alice/data/year/lhcPeriod/runNb/raw
        
-       TTimeStamp startTime(GetCurrentStartTime());
-               
-       TString year =  Form("%d",startTime.GetDate());
-       year = year(0,4);
                
-       TString lhcPeriod = GetRunParameter("LHCperiod");
-       
+       TString lhcPeriod = GetLHCPeriod();     
        if (lhcPeriod.Length() == 0) 
        {
                Log("SHUTTLE","StoreRunMetaDataFile - LHCPeriod not found in logbook!");
                return 0;
        }
        
-       // TODO: currently SHUTTLE cannot write in /alice/data/ !!!!!
-       //TString target = Form("%s/GRP/RunMetadata/alice/data/%s/%s/%d/Raw/%s", 
-       //                      localBaseFolder.Data(), year.Data(), 
-       //                      lhcPeriod.Data(), GetCurrentRun(), gridFileName);
+       // TODO partitions with one detector only write data into LHCperiod_DET
+       TString partition = GetRunParameter("detector");
        
-       TString target = Form("%s/GRP/RunMetadata/alice/simulation/%s/%s/%d/Raw/%s", 
-                               localBaseFolder.Data(), year.Data(), 
+       if (partition.Length() > 0 && partition != "ALICE")
+       {
+               lhcPeriod.Append(Form("_%s", partition.Data()));
+               Log(fCurrentDetector, Form("Run data tags merged file will be written in %s", 
+                               lhcPeriod.Data()));
+       }
+               
+       TString target = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/raw/%s", 
+                               localBaseFolder.Data(), GetCurrentYear(), 
                                lhcPeriod.Data(), GetCurrentRun(), gridFileName);
-       
-                               
                                        
        return CopyFileLocally(localFile, target);
 }
@@ -833,7 +927,7 @@ Bool_t AliShuttle::CopyFileLocally(const char* localFile, const TString& target)
        void* dir = gSystem->OpenDirectory(targetDir.Data());
        if (dir == NULL) {
                if (gSystem->mkdir(targetDir.Data(), kTRUE)) {
-                       Log("SHUTTLE", Form("StoreFileLocally - Can't open directory <%s>", targetDir.Data()));
+                       Log("SHUTTLE", Form("CopyFileLocally - Can't open directory <%s>", targetDir.Data()));
                        return kFALSE;
                }
 
@@ -841,23 +935,36 @@ Bool_t AliShuttle::CopyFileLocally(const char* localFile, const TString& target)
                gSystem->FreeDirectory(dir);
        }
        
-       Int_t result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0);
+       Int_t result = 0;
+       
+       result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0);
        if (result)
        {
-               Log("SHUTTLE", Form("StoreFileLocally - %s does not exist", localFile));
+               Log("SHUTTLE", Form("CopyFileLocally - %s does not exist", localFile));
                return kFALSE;
        }
 
+       result = gSystem->GetPathInfo(target, 0, (Long64_t*) 0, 0, 0);
+       if (!result)
+       {
+               Log("SHUTTLE", Form("CopyFileLocally - target file %s already exist, removing...", target.Data()));
+               if (gSystem->Unlink(target.Data()))
+               {
+                       Log("SHUTTLE", Form("CopyFileLocally - Could not remove existing target file %s!", target.Data()));
+                       return kFALSE;
+               }
+       }       
+       
        result = gSystem->CopyFile(localFile, target);
 
        if (result == 0)
        {
-               Log("SHUTTLE", Form("StoreFileLocally - File %s stored locally to %s", localFile, target.Data()));
+               Log("SHUTTLE", Form("CopyFileLocally - File %s stored locally to %s", localFile, target.Data()));
                return kTRUE;
        }
        else
        {
-               Log("SHUTTLE", Form("StoreFileLocally - Could not store file %s to %s! Error code = %d", 
+               Log("SHUTTLE", Form("CopyFileLocally - Could not store file %s to %s! Error code = %d", 
                                localFile, target.Data(), result));
                return kFALSE;
        }       
@@ -900,12 +1007,8 @@ Bool_t AliShuttle::CopyFilesToGrid(const char* type)
        } 
        else if (strcmp(type, "metadata") == 0)
        {
-               TTimeStamp startTime(GetCurrentStartTime());
-               
-               TString year =  Form("%d",startTime.GetDate());
-               year = year(0,4);
                        
-               TString lhcPeriod = GetRunParameter("LHCperiod");
+               TString lhcPeriod = GetLHCPeriod();
        
                if (lhcPeriod.Length() == 0) 
                {
@@ -913,16 +1016,19 @@ Bool_t AliShuttle::CopyFilesToGrid(const char* type)
                        return 0;
                }
                
-               // TODO: currently SHUTTLE cannot write in /alice/data/ !!!!!
-               //dir = Form("%s/GRP/RunMetadata/alice/data/%s/%s/%d/Raw", 
-               //              localBaseFolder.Data(), year.Data(), 
-               //              lhcPeriod.Data(), GetCurrentRun());
-               //alienDir = dir(dir.Index("/alice/data/"), dir.Length());
+               // TODO partitions with one detector only write data into LHCperiod_DET
+               TString partition = GetRunParameter("detector");
+       
+               if (partition.Length() > 0 && partition != "ALICE")
+               {
+                       lhcPeriod.Append(Form("_%s", partition.Data()));
+               }
                
-               dir = Form("%s/GRP/RunMetadata/alice/simulation/%s/%s/%d/Raw", 
-                               localBaseFolder.Data(), year.Data(), 
+               dir = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/raw", 
+                               localBaseFolder.Data(), GetCurrentYear(), 
                                lhcPeriod.Data(), GetCurrentRun());
-               alienDir = dir(dir.Index("/alice/simulation/"), dir.Length());
+               alienDir = dir(dir.Index("/alice/data/"), dir.Length());
+               
                begin = "";
        }
        else 
@@ -985,6 +1091,8 @@ Bool_t AliShuttle::CopyFilesToGrid(const char* type)
                        
                        if (!result->GetFileName(1)) // TODO: It looks like element 0 is always 0!!
                        {
+                               // TODO It does not work currently! Bug in TAliEn::Mkdir
+                               // TODO Manually fixed in local root v5-16-00
                                if (!gGrid->Mkdir(alienDir.Data(),"-p",0))
                                {
                                        Log("SHUTTLE", Form("CopyFilesToGrid - Cannot create directory %s",
@@ -1222,7 +1330,9 @@ void AliShuttle::SendMLInfo()
        mlList.Add(&mlStatus);
        mlList.Add(&mlRetryCount);
 
-       fMonaLisa->SendParameters(&mlList);
+       TString mlID;
+       mlID.Form("%d", GetCurrentRun());
+       fMonaLisa->SendParameters(&mlList, mlID);
 }
 
 //______________________________________________________________________________________________
@@ -1294,7 +1404,7 @@ Bool_t AliShuttle::ContinueProcessing()
                return kFALSE;
        }
 
-       if (status->GetStatus() == AliShuttleStatus::kStoreError) {
+       if (status->GetStatus() == AliShuttleStatus::kStoreStarted || status->GetStatus() == AliShuttleStatus::kStoreError) {
                Log("SHUTTLE",
                        Form("ContinueProcessing - %s: Grid storage of one or more "
                                "objects failed. Trying again now",
@@ -1305,7 +1415,7 @@ Bool_t AliShuttle::ContinueProcessing()
                                "successfully stored into main storage",
                                fCurrentDetector.Data()));
                        UpdateShuttleStatus(AliShuttleStatus::kDone);
-                       UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
+                       UpdateShuttleLogbook(fCurrentDetector, "DONE");
                } else {
                        Log("SHUTTLE",
                                Form("ContinueProcessing - %s: Grid storage failed again",
@@ -1349,6 +1459,7 @@ Bool_t AliShuttle::ContinueProcessing()
                if (status->GetStatus() == AliShuttleStatus::kDCSError || 
                        status->GetStatus() == AliShuttleStatus::kDCSStarted)
                                increaseCount = kFALSE;
+                               
                UpdateShuttleStatus(AliShuttleStatus::kStarted, increaseCount);
                cont = kTRUE;
        }
@@ -1373,11 +1484,6 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
        Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^*",
                                        GetCurrentRun()));
 
-       // create ML instance that monitors this run
-       fMonaLisa = new TMonaLisaWriter(Form("%d", GetCurrentRun()), "SHUTTLE", "aliendb1.cern.ch");
-       // disable monitoring of other parameters that come e.g. from TFile
-       gMonitoringWriter = 0;
-
        // Send the information to ML
        TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
        TMonaLisaText  mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
@@ -1386,7 +1492,9 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
        mlList.Add(&mlStatus);
        mlList.Add(&mlRunType);
 
-       fMonaLisa->SendParameters(&mlList);
+       TString mlID;
+       mlID.Form("%d", GetCurrentRun());
+       fMonaLisa->SendParameters(&mlList, mlID);
 
        if (fLogbookEntry->IsDone())
        {
@@ -1434,20 +1542,43 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
        // Initialization
        Bool_t hasError = kFALSE;
 
-       AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
-       if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
-       AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
-       if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
-
+       // Set the CDB and Reference folders according to the year and LHC period
+       TString lhcPeriod(GetLHCPeriod());
+       if (lhcPeriod.Length() == 0) 
+       {
+               Log("SHUTTLE","Process - LHCPeriod not found in logbook!");
+               return 0; 
+       }       
+       
+       if (fgkMainCDB.Length() == 0)
+               fgkMainCDB = Form("alien://folder=/alice/data/%d/%s/OCDB?user=alidaq?cacheFold=/tmp/OCDBCache", 
+                                       GetCurrentYear(), lhcPeriod.Data());
+       
+       if (fgkMainRefStorage.Length() == 0)
+               fgkMainRefStorage = Form("alien://folder=/alice/data/%d/%s/Reference?user=alidaq?cacheFold=/tmp/OCDBCache", 
+                                       GetCurrentYear(), lhcPeriod.Data());
+       
        // Loop on detectors in the configuration
        TIter iter(fConfig->GetDetectors());
        TObjString* aDetector = 0;
 
+       Bool_t first = kTRUE;
+
        while ((aDetector = (TObjString*) iter.Next()))
        {
                fCurrentDetector = aDetector->String();
 
                if (ContinueProcessing() == kFALSE) continue;
+               
+               if (first)
+               {
+                 // only read QueryCDB when needed and only once
+                 AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
+                 if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
+                 AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
+                 if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
+                 first = kFALSE;
+               }
 
                Log("SHUTTLE", Form("\t\t\t****** run %d - %s: START  ******",
                                                GetCurrentRun(), aDetector->GetName()));
@@ -1524,9 +1655,12 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
                                        }
                                        
                                        if (expiredTime % 60 == 0)
+                                       {
                                                Log("SHUTTLE", Form("Process - %s: Checking process. "
                                                        "Run time: %d seconds - Memory consumption: %d KB",
                                                        fCurrentDetector.Data(), expiredTime, mem));
+                                               SendAlive();
+                                       }
                                        
                                        if (mem > fConfig->GetPPMaxMem())
                                        {
@@ -1581,7 +1715,8 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
                        }
                        
                        TString wd = gSystem->WorkingDirectory();
-                       TString tmpDir = Form("%s/%s_process", GetShuttleTempDir(), fCurrentDetector.Data());
+                       TString tmpDir = Form("%s/%s_%d_process", GetShuttleTempDir(), 
+                               fCurrentDetector.Data(), GetCurrentRun());
                        
                        Int_t result = gSystem->GetPathInfo(tmpDir.Data(), 0, (Long64_t*) 0, 0, 0);
                        if (!result) // temp dir already exists!
@@ -1589,12 +1724,12 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
                                Log(fCurrentDetector.Data(), 
                                        Form("Process - %s dir already exists! Removing...", tmpDir.Data()));
                                gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));         
-                       } else {
-                               if (gSystem->mkdir(tmpDir.Data(), 1))
-                               {
-                                       Log(fCurrentDetector.Data(), "Process - could not make temp directory!!");
-                                       gSystem->Exit(1);
-                               }
+                       } 
+                       
+                       if (gSystem->mkdir(tmpDir.Data(), 1))
+                       {
+                               Log(fCurrentDetector.Data(), "Process - could not make temp directory!!");
+                               gSystem->Exit(1);
                        }
                        
                        if (!gSystem->ChangeDirectory(tmpDir.Data())) 
@@ -1606,11 +1741,13 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
                        Bool_t success = ProcessCurrentDetector();
                        
                        gSystem->ChangeDirectory(wd.Data());
-                       
-                       gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
-                       
+                                               
                        if (success) // Preprocessor finished successfully!
                        { 
+                               // remove temporary folder
+                                // temporary commented (JF)
+                               //gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
+                               
                                // Update time_processed field in FXS DB
                                if (UpdateTable() == kFALSE)
                                        Log("SHUTTLE", Form("Process - %s: Could not update FXS databases!", 
@@ -1661,7 +1798,8 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
        TObjArray checkEntryArray;
        checkEntryArray.SetOwner(1);
        TString whereClause = Form("where run=%d", GetCurrentRun());
-       if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
+       if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || 
+                       checkEntryArray.GetEntries() == 0) {
                Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
                                                GetCurrentRun()));
                return hasError == kFALSE;
@@ -1691,10 +1829,6 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
                }
        }
 
-       // remove ML instance
-       delete fMonaLisa;
-       fMonaLisa = 0;
-
        fLogbookEntry = 0;
 
        return hasError == kFALSE;
@@ -1773,9 +1907,13 @@ Bool_t AliShuttle::ProcessCurrentDetector()
                                {
                                        Log(fCurrentDetector, 
                                                Form("ProcessCurrentDetector -"
-                                                       " Error retrieving DCS aliases from server %s", 
-                                                               host.Data()));
+                                                       " Error retrieving DCS aliases from server %s."
+                                                       " Sending mail to DCS experts!", host.Data()));
                                        UpdateShuttleStatus(AliShuttleStatus::kDCSError);
+                                       
+                                       //if (!SendMailToDCS())
+                                       //      Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
+
                                        delete dcsMap;
                                        return kFALSE;
                                }
@@ -1790,9 +1928,13 @@ Bool_t AliShuttle::ProcessCurrentDetector()
                                {
                                        Log(fCurrentDetector, 
                                                Form("ProcessCurrentDetector -"
-                                                       " Error retrieving DCS data points from server %s", 
-                                                               host.Data()));
+                                                       " Error retrieving DCS data points from server %s."
+                                                       " Sending mail to DCS experts!", host.Data()));
                                        UpdateShuttleStatus(AliShuttleStatus::kDCSError);
+                                       
+                                       //if (!SendMailToDCS())
+                                       //      Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
+                                       
                                        if (aliasMap) delete aliasMap;
                                        delete dcsMap;
                                        return kFALSE;
@@ -1822,6 +1964,13 @@ Bool_t AliShuttle::ProcessCurrentDetector()
                }
        }
        
+       // save map into file, to help debugging in case of preprocessor error
+       TFile* f = TFile::Open("DCSMap.root","recreate");
+       f->cd();
+       dcsMap->Write("DCSMap", TObject::kSingleKey);
+       f->Close();
+       delete f;
+       
        // DCS Archive DB processing successful. Call Preprocessor!
        UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
 
@@ -1848,6 +1997,58 @@ Bool_t AliShuttle::ProcessCurrentDetector()
        return kTRUE;
 }
 
+//______________________________________________________________________________________________
+void AliShuttle::CountOpenRuns()
+{
+       // Query DAQ's Shuttle logbook and sends the number of open runs to ML
+       
+       // check connection, in case connect
+       if (!Connect(3)) 
+               return;
+
+       TString sqlQuery;
+       sqlQuery = Form("select count(*) from %s where shuttle_done=0", fConfig->GetShuttlelbTable());
+       
+       TSQLResult* aResult = fServer[3]->Query(sqlQuery);
+       if (!aResult) {
+               AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
+               return;
+       }
+
+       AliDebug(2,Form("Query = %s", sqlQuery.Data()));
+       
+       if (aResult->GetRowCount() == 0) {
+               AliError(Form("No result for query %s received", sqlQuery.Data()));
+               return;
+       }
+
+       if (aResult->GetFieldCount() != 1) {
+               AliError(Form("Invalid field count for query %s received", sqlQuery.Data()));
+               return;
+       }
+
+       TSQLRow* aRow = aResult->Next();
+       if (!aRow) {
+               AliError(Form("Could not receive result of query %s", sqlQuery.Data()));
+               return;
+       }
+       
+       TString result(aRow->GetField(0), aRow->GetFieldLength(0));
+       Int_t count = result.Atoi();
+       
+       Log("SHUTTLE", Form("%d unprocessed runs", count));
+       
+       delete aRow;
+       delete aResult;
+
+       TMonaLisaValue mlStatus("SHUTTLE_openruns", count);
+
+       TList mlList;
+       mlList.Add(&mlStatus);
+
+       fMonaLisa->SendParameters(&mlList, "__PROCESSINGINFO__");
+}
+
 //______________________________________________________________________________________________
 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
                TObjArray& entries)
@@ -1859,7 +2060,7 @@ Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
        entries.SetOwner(1);
 
        // check connection, in case connect
-       if(!Connect(3)) return kFALSE;
+       if (!Connect(3)) return kFALSE;
 
        TString sqlQuery;
        sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
@@ -1956,10 +2157,106 @@ AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
        UInt_t startTime = entry->GetStartTime();
        UInt_t endTime = entry->GetEndTime();
 
-       if (!startTime || !endTime || startTime > endTime) {
+//     if (!startTime || !endTime || startTime > endTime) 
+//     {
+//             Log("SHUTTLE",
+//                     Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d. Skipping!",
+//                             run, startTime, endTime));              
+//             
+//             Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
+//             fLogbookEntry = entry;  
+//             if (!UpdateShuttleLogbook("shuttle_done"))
+//             {
+//                     AliError(Form("Could not update logbook for run %d !", run));
+//             }
+//             fLogbookEntry = 0;
+//                             
+//             delete entry;
+//             delete aRow;
+//             delete aResult;
+//             return 0;
+//     }
+
+       if (!startTime) 
+       {
                Log("SHUTTLE",
-                       Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
-                               run, startTime, endTime));
+                       Form("QueryRunParameters - Invalid parameters for Run %d: " 
+                               "startTime = %d, endTime = %d. Skipping!",
+                                       run, startTime, endTime));              
+               
+               Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
+               fLogbookEntry = entry;  
+               if (!UpdateShuttleLogbook("shuttle_ignored"))
+               {
+                       AliError(Form("Could not update logbook for run %d !", run));
+               }
+               fLogbookEntry = 0;
+                               
+               delete entry;
+               delete aRow;
+               delete aResult;
+               return 0;
+       }
+       
+       if (startTime && !endTime) 
+       {
+               // TODO Here we don't mark SHUTTLE done, because this may mean 
+               //the run is still ongoing!!            
+               Log("SHUTTLE",
+                       Form("QueryRunParameters - Invalid parameters for Run %d: "
+                            "startTime = %d, endTime = %d. Skipping (Shuttle won't be marked as DONE)!",
+                                       run, startTime, endTime));              
+               
+               //Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
+               //fLogbookEntry = entry;        
+               //if (!UpdateShuttleLogbook("shuttle_done"))
+               //{
+               //      AliError(Form("Could not update logbook for run %d !", run));
+               //}
+               //fLogbookEntry = 0;
+                               
+               delete entry;
+               delete aRow;
+               delete aResult;
+               return 0;
+       }
+                       
+       if (startTime && endTime && (startTime > endTime)) 
+       {
+               Log("SHUTTLE",
+                       Form("QueryRunParameters - Invalid parameters for Run %d: "
+                               "startTime = %d, endTime = %d. Skipping!",
+                                       run, startTime, endTime));              
+               
+               Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
+               fLogbookEntry = entry;  
+               if (!UpdateShuttleLogbook("shuttle_ignored"))
+               {
+                       AliError(Form("Could not update logbook for run %d !", run));
+               }
+               fLogbookEntry = 0;
+                               
+               delete entry;
+               delete aRow;
+               delete aResult;
+               return 0;
+       }
+                       
+       TString totEventsStr = entry->GetRunParameter("totalEvents");  
+       Int_t totEvents = totEventsStr.Atoi();
+       if (totEvents < 1) 
+       {
+               Log("SHUTTLE",
+                       Form("QueryRunParameters - Run %d has 0 events - Skipping!", run));             
+               
+               Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));           
+               fLogbookEntry = entry;  
+               if (!UpdateShuttleLogbook("shuttle_ignored"))
+               {
+                       AliError(Form("Could not update logbook for run %d !", run));
+               }
+               fLogbookEntry = 0;
+                               
                delete entry;
                delete aRow;
                delete aResult;
@@ -2116,8 +2413,8 @@ const char* AliShuttle::GetFile(Int_t system, const char* detector,
                                filePath.Data(), fileSize.Data(), fileChecksum.Data()));
 
        // retrieved file is renamed to make it unique
-       TString localFileName = Form("%s/%s_process/%s_%s_%d_%s_%s.shuttle",
-                                       GetShuttleTempDir(), detector,
+       TString localFileName = Form("%s/%s_%d_process/%s_%s_%d_%s_%s.shuttle",
+                                       GetShuttleTempDir(), detector, GetCurrentRun(),
                                        GetSystemName(system), detector, GetCurrentRun(), 
                                        id, sourceName.Data());
 
@@ -2164,13 +2461,13 @@ const char* AliShuttle::GetFile(Int_t system, const char* detector,
        TObjString *fileParams = new TObjString(Form("%s#!?!#%s", id, sourceName.Data()));
        fFXSlist[system].Add(fileParams);
 
-       static TString staticLocalFileName = localFileName;
-       //fullLocalFileName.Form("%s/%s_process/%s", GetShuttleTempDir(), detector, localFileName.Data());
-
+       static TString staticLocalFileName;
+       staticLocalFileName.Form("%s", localFileName.Data());
+       
        Log(fCurrentDetector, Form("GetFile - Retrieved file with id %s and "
                        "source %s from %s to %s", id, source, 
                        GetSystemName(system), localFileName.Data()));
-
+                       
        return staticLocalFileName.Data();
 }
 
@@ -2237,7 +2534,12 @@ TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char
        // if id is NULL all sources are returned (distinct)
        //
 
-       Log(detector, Form("GetFileSources - Retrieving sources with id %s from %s", id, GetSystemName(system)));
+       if (id)
+       {
+               Log(detector, Form("GetFileSources - Querying %s FXS for files with id %s produced by %s", GetSystemName(system), id, detector));
+       } else {
+               Log(detector, Form("GetFileSources - Querying %s FXS for files produced by %s", GetSystemName(system), detector));
+       }
        
        // check if test mode should simulate a FXS error
        if (fTestMode & kErrorFXSSources)
@@ -2603,17 +2905,22 @@ Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status
 
        TString detName(detector);
        TString setClause;
-       if(detName == "shuttle_done")
+       if (detName == "shuttle_done" || detName == "shuttle_ignored")
        {
                setClause = "set shuttle_done=1";
 
-               // Send the information to ML
-               TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
-
-               TList mlList;
-               mlList.Add(&mlStatus);
+               if (detName == "shuttle_done")
+               {
+                       // Send the information to ML
+                       TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
 
-               fMonaLisa->SendParameters(&mlList);
+                       TList mlList;
+                       mlList.Add(&mlStatus);
+               
+                       TString mlID;
+                       mlID.Form("%d", GetCurrentRun());
+                       fMonaLisa->SendParameters(&mlList, mlID);
+               }
        } else {
                TString statusStr(status);
                if(statusStr.Contains("done", TString::kIgnoreCase) ||
@@ -2676,6 +2983,34 @@ UInt_t AliShuttle::GetCurrentEndTime() const
        return fLogbookEntry ? fLogbookEntry->GetEndTime() : 0;
 }
 
+//______________________________________________________________________________________________
+UInt_t AliShuttle::GetCurrentYear() const
+{
+       //
+       // Get current year from logbook entry
+       //
+
+       if (!fLogbookEntry) return 0;
+       
+       TTimeStamp startTime(GetCurrentStartTime());
+       TString year =  Form("%d",startTime.GetDate());
+       year = year(0,4);
+       
+       return year.Atoi();
+}
+
+//______________________________________________________________________________________________
+const char* AliShuttle::GetLHCPeriod() const
+{
+       //
+       // Get current LHC period from logbook entry
+       //
+
+       if (!fLogbookEntry) return 0;
+               
+       return fLogbookEntry->GetRunParameter("LHCperiod");
+}
+
 //______________________________________________________________________________________________
 void AliShuttle::Log(const char* detector, const char* message)
 {
@@ -2683,9 +3018,13 @@ void AliShuttle::Log(const char* detector, const char* message)
        // Fill log string with a message
        //
 
-       void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
+       TString logRunDir = GetShuttleLogDir();
+       if (GetCurrentRun() >=0)
+               logRunDir += Form("/%d", GetCurrentRun());
+       
+       void* dir = gSystem->OpenDirectory(logRunDir.Data());
        if (dir == NULL) {
-               if (gSystem->mkdir(GetShuttleLogDir(), kTRUE)) {
+               if (gSystem->mkdir(logRunDir.Data(), kTRUE)) {
                        AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
                        return;
                }
@@ -2732,13 +3071,29 @@ TString AliShuttle::GetLogFileName(const char* detector) const
        TString fileName;
        
        if (GetCurrentRun() >= 0) 
-               fileName.Form("%s/%s_%d.log", GetShuttleLogDir(), detector, GetCurrentRun());
-       else
+       {
+               fileName.Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(), 
+                       detector, GetCurrentRun());
+       } else {
                fileName.Form("%s/%s.log", GetShuttleLogDir(), detector);
+       }
 
        return fileName;
 }
 
+//______________________________________________________________________________________________
+void AliShuttle::SendAlive()
+{
+       // sends alive message to ML
+       
+       TMonaLisaText mlStatus("SHUTTLE_status", "Alive");
+
+       TList mlList;
+       mlList.Add(&mlStatus);
+
+       fMonaLisa->SendParameters(&mlList, "__PROCESSINGINFO__");
+}
+
 //______________________________________________________________________________________________
 Bool_t AliShuttle::Collect(Int_t run)
 {
@@ -2756,6 +3111,13 @@ Bool_t AliShuttle::Collect(Int_t run)
 
        SetLastAction("Starting");
 
+       // create ML instance
+       if (!fMonaLisa)
+               fMonaLisa = new TMonaLisaWriter(fConfig->GetMonitorHost(), fConfig->GetMonitorTable());
+               
+       SendAlive();
+       CountOpenRuns();
+
        TString whereClause("where shuttle_done=0");
        if (run != -1)
                whereClause += Form(" and run=%d", run);
@@ -2813,10 +3175,12 @@ Bool_t AliShuttle::Collect(Int_t run)
        if (!RetrieveConditionsData(shuttleLogbookEntries))
        {
                Log("SHUTTLE", "Collect - Process of at least one run failed");
+               CountOpenRuns();
                return kFALSE;
        }
 
        Log("SHUTTLE", "Collect - Requested run(s) successfully processed");
+       CountOpenRuns();
        return kTRUE;
 }
 
@@ -2944,12 +3308,28 @@ Bool_t AliShuttle::SendMail()
        if (fTestMode != kNone)
                return kTRUE;
 
+       TString to="";
+       TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
+       TObjString *anExpert=0;
+       while ((anExpert = (TObjString*) iterExperts.Next()))
+       {
+               to += Form("%s,", anExpert->GetName());
+       }
+       if (to.Length() > 0)
+         to.Remove(to.Length()-1);
+       AliDebug(2, Form("to: %s",to.Data()));
+
+       if (to.IsNull()) {
+               Log("SHUTTLE", "List of detector responsibles not yet set!");
+               return kFALSE;
+       }
+
        void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
        if (dir == NULL)
        {
                if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
                {
-                       AliError(Form("Can't open directory <%s>", GetShuttleLogDir()));
+                       Log("SHUTTLE", Form("SendMail - Can't open directory <%s>", GetShuttleLogDir()));
                        return kFALSE;
                }
 
@@ -2966,18 +3346,117 @@ Bool_t AliShuttle::SendMail()
 
        if (!mailBody.is_open())
        {
-               AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
+               Log("SHUTTLE", Form("Could not open mail body file %s", bodyFileName.Data()));
                return kFALSE;
        }
 
-       TString to="";
-       TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
-       TObjString *anExpert=0;
-       while ((anExpert = (TObjString*) iterExperts.Next()))
+       TString cc="alberto.colla@cern.ch";
+
+       TString subject = Form("%s Shuttle preprocessor FAILED in run %d (run type = %s)!",
+                               fCurrentDetector.Data(), GetCurrentRun(), GetRunType());
+       AliDebug(2, Form("subject: %s", subject.Data()));
+
+       TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
+       body += Form("SHUTTLE just detected that your preprocessor "
+                       "failed processing run %d (run type = %s)!!\n\n", 
+                                       GetCurrentRun(), GetRunType());
+       body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", 
+                               fCurrentDetector.Data());
+       if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
        {
-               to += Form("%s,", anExpert->GetName());
+               body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
+       } else {
+               body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD&time=168 \n\n");
+       }
+       
+       
+       TString logFolder = "logs";
+       if (fConfig->GetRunMode() == AliShuttleConfig::kProd) 
+               logFolder += "_PROD";
+       
+       
+       body += Form("Find the %s log for the current run on \n\n"
+               "\thttp://pcalishuttle01.cern.ch:8880/%s/%d/%s_%d.log \n\n", 
+               fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun(), 
+                               fCurrentDetector.Data(), GetCurrentRun());
+       body += Form("The last 10 lines of %s log file are following:\n\n", fCurrentDetector.Data());
+
+       AliDebug(2, Form("Body begin: %s", body.Data()));
+
+       mailBody << body.Data();
+       mailBody.close();
+       mailBody.open(bodyFileName, ofstream::out | ofstream::app);
+
+       TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(), 
+               GetCurrentRun(), fCurrentDetector.Data(), GetCurrentRun());
+       TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
+       if (gSystem->Exec(tailCommand.Data()))
+       {
+               mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
+       }
+
+       TString endBody = Form("------------------------------------------------------\n\n");
+       endBody += Form("In case of problems please contact the SHUTTLE core team.\n\n");
+       endBody += "Please do not answer this message directly, it is automatically generated.\n\n";
+       endBody += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
+
+       AliDebug(2, Form("Body end: %s", endBody.Data()));
+
+       mailBody << endBody.Data();
+
+       mailBody.close();
+
+       // send mail!
+       TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
+                                               subject.Data(),
+                                               cc.Data(),
+                                               to.Data(),
+                                               bodyFileName.Data());
+       AliDebug(2, Form("mail command: %s", mailCommand.Data()));
+
+       Bool_t result = gSystem->Exec(mailCommand.Data());
+
+       return result == 0;
+}
+
+//______________________________________________________________________________________________
+Bool_t AliShuttle::SendMailToDCS()
+{
+       //
+       // sends a mail to the DCS experts in case of DCS error
+       //
+       
+       if (fTestMode != kNone)
+               return kTRUE;
+
+       void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
+       if (dir == NULL)
+       {
+               if (gSystem->mkdir(GetShuttleLogDir(), kTRUE))
+               {
+                       Log("SHUTTLE", Form("SendMailToDCS - Can't open directory <%s>", GetShuttleLogDir()));
+                       return kFALSE;
+               }
+
+       } else {
+               gSystem->FreeDirectory(dir);
        }
-       to.Remove(to.Length()-1);
+
+       TString bodyFileName;
+       bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
+       gSystem->ExpandPathName(bodyFileName);
+
+       ofstream mailBody;
+       mailBody.open(bodyFileName, ofstream::out);
+
+       if (!mailBody.is_open())
+       {
+               Log("SHUTTLE", Form("SendMailToDCS - Could not open mail body file %s", bodyFileName.Data()));
+               return kFALSE;
+       }
+
+       TString to="Vladimir.Fekete@cern.ch, Svetozar.Kapusta@cern.ch";
+       //TString to="alberto.colla@cern.ch";
        AliDebug(2, Form("to: %s",to.Data()));
 
        if (to.IsNull()) {
@@ -2987,19 +3466,32 @@ Bool_t AliShuttle::SendMail()
 
        TString cc="alberto.colla@cern.ch";
 
-       TString subject = Form("%s Shuttle preprocessor FAILED in run %d !",
+       TString subject = Form("Retrieval of data points for %s FAILED in run %d !",
                                fCurrentDetector.Data(), GetCurrentRun());
        AliDebug(2, Form("subject: %s", subject.Data()));
 
-       TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
-       body += Form("SHUTTLE just detected that your preprocessor "
-                       "failed processing run %d!!\n\n", GetCurrentRun());
-       body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", fCurrentDetector.Data());
-       body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
+       TString body = Form("Dear DCS experts, \n\n");
+       body += Form("SHUTTLE couldn\'t retrieve the data points for detector %s "
+                       "in run %d!!\n\n", fCurrentDetector.Data(), GetCurrentRun());
+       body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", 
+                               fCurrentDetector.Data());
+       if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
+       {
+               body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
+       } else {
+               body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD?time=168 \n\n");
+       }
+
+       TString logFolder = "logs";
+       if (fConfig->GetRunMode() == AliShuttleConfig::kProd) 
+               logFolder += "_PROD";
+       
+       
        body += Form("Find the %s log for the current run on \n\n"
-               "\thttp://pcalishuttle01.cern.ch:8880/logs/%s_%d.log \n\n", 
-               fCurrentDetector.Data(), fCurrentDetector.Data(), GetCurrentRun());
-       body += Form("The last 10 lines of %s log file are following:\n\n");
+               "\thttp://pcalishuttle01.cern.ch:8880/%s/%d/%s_%d.log \n\n", 
+               fCurrentDetector.Data(), logFolder.Data(), GetCurrentRun(), 
+                               fCurrentDetector.Data(), GetCurrentRun());
+       body += Form("The last 10 lines of %s log file are following:\n\n", fCurrentDetector.Data());
 
        AliDebug(2, Form("Body begin: %s", body.Data()));
 
@@ -3007,7 +3499,8 @@ Bool_t AliShuttle::SendMail()
        mailBody.close();
        mailBody.open(bodyFileName, ofstream::out | ofstream::app);
 
-       TString logFileName = Form("%s/%s_%d.log", GetShuttleLogDir(), fCurrentDetector.Data(), GetCurrentRun());
+       TString logFileName = Form("%s/%d/%s_%d.log", GetShuttleLogDir(), GetCurrentRun(),
+               fCurrentDetector.Data(), GetCurrentRun());
        TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
        if (gSystem->Exec(tailCommand.Data()))
        {