X-Git-Url: http://git.uio.no/git/?a=blobdiff_plain;f=ANALYSIS%2FAliAnalysisAlien.cxx;h=f402fa43f0e50c5feddd3f8dafe0f498167cf24d;hb=b676e981d8c1c42875fda4dce60143608ea83c7a;hp=5b47028602b27090f7b3f984c7928ad359c422a3;hpb=3b0a01192325f5790423859a502189b531aa2191;p=u%2Fmrichter%2FAliRoot.git diff --git a/ANALYSIS/AliAnalysisAlien.cxx b/ANALYSIS/AliAnalysisAlien.cxx index 5b47028602b..f402fa43f0e 100644 --- a/ANALYSIS/AliAnalysisAlien.cxx +++ b/ANALYSIS/AliAnalysisAlien.cxx @@ -21,6 +21,8 @@ //============================================================================== #include "Riostream.h" +#include "TEnv.h" +#include "TError.h" #include "TROOT.h" #include "TSystem.h" #include "TFile.h" @@ -30,6 +32,8 @@ #include "TGridResult.h" #include "TGridCollection.h" #include "TGridJDL.h" +#include "TGridJobStatusList.h" +#include "TGridJobStatus.h" #include "TFileMerger.h" #include "AliAnalysisManager.h" #include "AliVEventHandler.h" @@ -42,22 +46,35 @@ ClassImp(AliAnalysisAlien) AliAnalysisAlien::AliAnalysisAlien() :AliAnalysisGrid(), fGridJDL(NULL), + fMergingJDL(NULL), fPrice(0), fTTL(0), fSplitMaxInputFileNumber(0), fMaxInitFailed(0), fMasterResubmitThreshold(0), fNtestFiles(0), + fNrunsPerMaster(0), + fMaxMergeFiles(0), + fNsubmitted(0), + fProductionMode(0), + fOutputToRunNo(0), + fMergeViaJDL(0), + fFastReadOption(0), + fOverwriteMode(0), fRunNumbers(), fExecutable(), + fExecutableCommand(), fArguments(), + fExecutableArgs(), fAnalysisMacro(), fAnalysisSource(), + fAdditionalRootLibs(), fAdditionalLibs(), fSplitMode(), fAPIVersion(), fROOTVersion(), fAliROOTVersion(), + fExternalPackages(), fUser(), fGridWorkingDir(), fGridDataDir(), @@ -71,6 +88,10 @@ AliAnalysisAlien::AliAnalysisAlien() fMergeExcludes(), fIncludePath(), fCloseSE(), + fFriendChainName(), + fJobTag(), + fOutputSingle(), + fRunPrefix(), fInputFiles(0), fPackages(0) { @@ -82,22 +103,35 @@ AliAnalysisAlien::AliAnalysisAlien() AliAnalysisAlien::AliAnalysisAlien(const char *name) :AliAnalysisGrid(name), fGridJDL(NULL), + fMergingJDL(NULL), fPrice(0), fTTL(0), fSplitMaxInputFileNumber(0), fMaxInitFailed(0), fMasterResubmitThreshold(0), fNtestFiles(0), + fNrunsPerMaster(0), + fMaxMergeFiles(0), + fNsubmitted(0), + fProductionMode(0), + fOutputToRunNo(0), + fMergeViaJDL(0), + fFastReadOption(0), + fOverwriteMode(0), fRunNumbers(), fExecutable(), + fExecutableCommand(), fArguments(), + fExecutableArgs(), fAnalysisMacro(), fAnalysisSource(), + fAdditionalRootLibs(), fAdditionalLibs(), fSplitMode(), fAPIVersion(), fROOTVersion(), fAliROOTVersion(), + fExternalPackages(), fUser(), fGridWorkingDir(), fGridDataDir(), @@ -111,6 +145,10 @@ AliAnalysisAlien::AliAnalysisAlien(const char *name) fMergeExcludes(), fIncludePath(), fCloseSE(), + fFriendChainName(), + fJobTag(), + fOutputSingle(), + fRunPrefix(), fInputFiles(0), fPackages(0) { @@ -122,22 +160,35 @@ AliAnalysisAlien::AliAnalysisAlien(const char *name) AliAnalysisAlien::AliAnalysisAlien(const AliAnalysisAlien& other) :AliAnalysisGrid(other), fGridJDL(NULL), + fMergingJDL(NULL), fPrice(other.fPrice), fTTL(other.fTTL), fSplitMaxInputFileNumber(other.fSplitMaxInputFileNumber), fMaxInitFailed(other.fMaxInitFailed), fMasterResubmitThreshold(other.fMasterResubmitThreshold), fNtestFiles(other.fNtestFiles), + fNrunsPerMaster(other.fNrunsPerMaster), + fMaxMergeFiles(other.fMaxMergeFiles), + fNsubmitted(other.fNsubmitted), + fProductionMode(other.fProductionMode), + fOutputToRunNo(other.fOutputToRunNo), + fMergeViaJDL(other.fMergeViaJDL), + fFastReadOption(other.fFastReadOption), + fOverwriteMode(other.fOverwriteMode), fRunNumbers(other.fRunNumbers), fExecutable(other.fExecutable), + fExecutableCommand(other.fExecutableCommand), fArguments(other.fArguments), + fExecutableArgs(other.fExecutableArgs), fAnalysisMacro(other.fAnalysisMacro), fAnalysisSource(other.fAnalysisSource), + fAdditionalRootLibs(other.fAdditionalRootLibs), fAdditionalLibs(other.fAdditionalLibs), fSplitMode(other.fSplitMode), fAPIVersion(other.fAPIVersion), fROOTVersion(other.fROOTVersion), fAliROOTVersion(other.fAliROOTVersion), + fExternalPackages(other.fExternalPackages), fUser(other.fUser), fGridWorkingDir(other.fGridWorkingDir), fGridDataDir(other.fGridDataDir), @@ -151,11 +202,18 @@ AliAnalysisAlien::AliAnalysisAlien(const AliAnalysisAlien& other) fMergeExcludes(other.fMergeExcludes), fIncludePath(other.fIncludePath), fCloseSE(other.fCloseSE), + fFriendChainName(other.fFriendChainName), + fJobTag(other.fJobTag), + fOutputSingle(other.fOutputSingle), + fRunPrefix(other.fRunPrefix), fInputFiles(0), fPackages(0) { // Copy ctor. fGridJDL = (TGridJDL*)gROOT->ProcessLine("new TAlienJDL()"); + fMergingJDL = (TGridJDL*)gROOT->ProcessLine("new TAlienJDL()"); + fRunRange[0] = other.fRunRange[0]; + fRunRange[1] = other.fRunRange[1]; if (other.fInputFiles) { fInputFiles = new TObjArray(); TIter next(other.fInputFiles); @@ -177,6 +235,7 @@ AliAnalysisAlien::~AliAnalysisAlien() { // Destructor. if (fGridJDL) delete fGridJDL; + if (fMergingJDL) delete fMergingJDL; if (fInputFiles) delete fInputFiles; if (fPackages) delete fPackages; } @@ -188,22 +247,35 @@ AliAnalysisAlien &AliAnalysisAlien::operator=(const AliAnalysisAlien& other) if (this != &other) { AliAnalysisGrid::operator=(other); fGridJDL = (TGridJDL*)gROOT->ProcessLine("new TAlienJDL()"); + fMergingJDL = (TGridJDL*)gROOT->ProcessLine("new TAlienJDL()"); fPrice = other.fPrice; fTTL = other.fTTL; fSplitMaxInputFileNumber = other.fSplitMaxInputFileNumber; fMaxInitFailed = other.fMaxInitFailed; fMasterResubmitThreshold = other.fMasterResubmitThreshold; fNtestFiles = other.fNtestFiles; + fNrunsPerMaster = other.fNrunsPerMaster; + fMaxMergeFiles = other.fMaxMergeFiles; + fNsubmitted = other.fNsubmitted; + fProductionMode = other.fProductionMode; + fOutputToRunNo = other.fOutputToRunNo; + fMergeViaJDL = other.fMergeViaJDL; + fFastReadOption = other.fFastReadOption; + fOverwriteMode = other.fOverwriteMode; fRunNumbers = other.fRunNumbers; fExecutable = other.fExecutable; + fExecutableCommand = other.fExecutableCommand; fArguments = other.fArguments; + fExecutableArgs = other.fExecutableArgs; fAnalysisMacro = other.fAnalysisMacro; fAnalysisSource = other.fAnalysisSource; + fAdditionalRootLibs = other.fAdditionalRootLibs; fAdditionalLibs = other.fAdditionalLibs; fSplitMode = other.fSplitMode; fAPIVersion = other.fAPIVersion; fROOTVersion = other.fROOTVersion; fAliROOTVersion = other.fAliROOTVersion; + fExternalPackages = other.fExternalPackages; fUser = other.fUser; fGridWorkingDir = other.fGridWorkingDir; fGridDataDir = other.fGridDataDir; @@ -217,6 +289,10 @@ AliAnalysisAlien &AliAnalysisAlien::operator=(const AliAnalysisAlien& other) fMergeExcludes = other.fMergeExcludes; fIncludePath = other.fIncludePath; fCloseSE = other.fCloseSE; + fFriendChainName = other.fFriendChainName; + fJobTag = other.fJobTag; + fOutputSingle = other.fOutputSingle; + fRunPrefix = other.fRunPrefix; if (other.fInputFiles) { fInputFiles = new TObjArray(); TIter next(other.fInputFiles); @@ -249,7 +325,15 @@ void AliAnalysisAlien::AddRunNumber(Int_t run) { // Add a run number to the list of runs to be processed. if (fRunNumbers.Length()) fRunNumbers += " "; - fRunNumbers += Form("%d", run); + fRunNumbers += Form("%s%d", fRunPrefix.Data(), run); +} + +//______________________________________________________________________________ +void AliAnalysisAlien::AddRunNumber(const char* run) +{ +// Add a run number to the list of runs to be processed. + if (fRunNumbers.Length()) fRunNumbers += " "; + fRunNumbers += run; } //______________________________________________________________________________ @@ -260,17 +344,20 @@ void AliAnalysisAlien::AddDataFile(const char *lfn) if (!fInputFiles) fInputFiles = new TObjArray(); fInputFiles->Add(new TObjString(lfn)); } - + +//______________________________________________________________________________ +void AliAnalysisAlien::AddExternalPackage(const char *package) +{ +// Adds external packages w.r.t to the default ones (root,aliroot and gapi) + if (fExternalPackages) fExternalPackages += " "; + fExternalPackages += package; +} + //______________________________________________________________________________ Bool_t AliAnalysisAlien::Connect() { // Try to connect to AliEn. User needs a valid token and /tmp/gclient_env_$UID sourced. if (gGrid && gGrid->IsConnected()) return kTRUE; - if (!gSystem->Getenv("alien_API_USER")) { - Error("Connect", "Make sure you:\n 1. Have called: alien-token-init today\n 2. Have sourced /tmp/gclient_env_%s", - gSystem->Getenv("UID")); - return kFALSE; - } if (!gGrid) { Info("Connect", "Trying to connect to AliEn ..."); TGrid::Connect("alien://"); @@ -294,26 +381,81 @@ void AliAnalysisAlien::CdWork() } TString homedir = gGrid->GetHomeDirectory(); TString workdir = homedir + fGridWorkingDir; - if (!gGrid->Cd(workdir)) { - gGrid->Cd(homedir); - if (gGrid->Mkdir(workdir)) { - gGrid->Cd(fGridWorkingDir); - Info("CreateJDL", "\n##### Created alien working directory %s", fGridWorkingDir.Data()); - } else { - Warning("CreateJDL", "Working directory %s cannot be created.\n Using %s instead.", - workdir.Data(), homedir.Data()); - fGridWorkingDir = ""; - } - } + if (DirectoryExists(workdir)) { + gGrid->Cd(workdir); + return; + } + // Work directory not existing - create it + gGrid->Cd(homedir); + if (gGrid->Mkdir(workdir)) { + gGrid->Cd(fGridWorkingDir); + Info("CreateJDL", "\n##### Created alien working directory %s", fGridWorkingDir.Data()); + } else { + Warning("CreateJDL", "Working directory %s cannot be created.\n Using %s instead.", + workdir.Data(), homedir.Data()); + fGridWorkingDir = ""; + } } +//______________________________________________________________________________ +Bool_t AliAnalysisAlien::CheckFileCopy(const char *alienpath) +{ +// Check if file copying is possible. + if (!Connect()) { + Error("CheckFileCopy", "Not connected to AliEn. File copying cannot be tested."); + return kFALSE; + } + Info("CheckFileCopy", "Checking possibility to copy files to your AliEn home directory... \ + \n +++ NOTE: You can disable this via: plugin->SetCheckCopy(kFALSE);"); + // Check if alien_CLOSE_SE is defined + TString closeSE = gSystem->Getenv("alien_CLOSE_SE"); + if (!closeSE.IsNull()) { + Info("CheckFileCopy", "Your current close storage is pointing to: \ + \n alien_CLOSE_SE = \"%s\"", closeSE.Data()); + } else { + Warning("CheckFileCopy", "Your current close storage is empty ! Depending on your location, file copying may fail."); + } + // Check if grid directory exists. + if (!DirectoryExists(alienpath)) { + Error("CheckFileCopy", "Alien path %s does not seem to exist", alienpath); + return kFALSE; + } + TFile f("plugin_test_copy", "RECREATE"); + // User may not have write permissions to current directory + if (f.IsZombie()) { + Error("CheckFileCopy", "Cannot create local test file. Do you have write access to current directory: <%s> ?", + gSystem->WorkingDirectory()); + return kFALSE; + } + f.Close(); + if (FileExists(Form("alien://%s/%s",alienpath, f.GetName()))) gGrid->Rm(Form("alien://%s/%s",alienpath, f.GetName())); + if (!TFile::Cp(f.GetName(), Form("alien://%s/%s",alienpath, f.GetName()))) { + Error("CheckFileCopy", "Cannot copy files to Alien destination: <%s> This may be temporary, or: \ + \n# 1. Make sure you have write permissions there. If this is the case: \ + \n# 2. Check the storage availability at: http://alimonitor.cern.ch/stats?page=SE/table \ + \n# Do: export alien_CLOSE_SE=\"working_disk_SE\" \ + \n# To make this permanent put in in your .bashrc (in .alienshrc is not enough) \ + \n# Redo token: rm /tmp/x509up_u$UID then: alien-token-init ", alienpath); + gSystem->Unlink(f.GetName()); + return kFALSE; + } + gSystem->Unlink(f.GetName()); + gGrid->Rm(Form("%s%s",alienpath,f.GetName())); + Info("CheckFileCopy", "### ...SUCCESS ###"); + return kTRUE; +} + //______________________________________________________________________________ Bool_t AliAnalysisAlien::CheckInputData() { // Check validity of input data. If necessary, create xml files. - if (!fInputFiles && !fRunNumbers.Length()) { - Error("CheckInputData", "You have to specify either a set of run numbers or some existing grid files. Use AddRunNumber()/AddDataFile()."); - return kFALSE; + if (!fInputFiles && !fRunNumbers.Length() && !fRunRange[0]) { + if (!fGridDataDir.Length()) { + Error("CkeckInputData", "AliEn path to base data directory must be set.\n = Use: SetGridDataDir()"); + return kFALSE; + } + Info("CheckInputData", "Analysis will make a single xml for base data directory %s",fGridDataDir.Data()); + return kTRUE; } // Process declared files Bool_t is_collection = kFALSE; @@ -358,13 +500,13 @@ Bool_t AliAnalysisAlien::CheckInputData() } } // Process requested run numbers - if (!fRunNumbers.Length()) return kTRUE; + if (!fRunNumbers.Length() && !fRunRange[0]) return kTRUE; // Check validity of alien data directory if (!fGridDataDir.Length()) { Error("CkeckInputData", "AliEn path to base data directory must be set.\n = Use: SetGridDataDir()"); return kFALSE; } - if (!gGrid->Cd(fGridDataDir)) { + if (!DirectoryExists(fGridDataDir)) { Error("CheckInputData", "Data directory %s not existing.", fGridDataDir.Data()); return kFALSE; } @@ -380,6 +522,8 @@ Bool_t AliAnalysisAlien::CheckInputData() // Check validity of run number(s) TObjArray *arr; TObjString *os; + Int_t nruns = 0; + TString schunk, schunk2; TString path; if (!checked) { checked = kTRUE; @@ -391,13 +535,14 @@ Bool_t AliAnalysisAlien::CheckInputData() return kFALSE; } if (fRunNumbers.Length()) { + Info("CheckDataType", "Using supplied run numbers (run ranges are ignored)"); arr = fRunNumbers.Tokenize(" "); TIter next(arr); while ((os=(TObjString*)next())) { path = Form("%s/%s ", fGridDataDir.Data(), os->GetString().Data()); - if (!gGrid->Cd(path)) { - Error("CheckInputData", "Run number %s not found in path: %s", os->GetString().Data(), path.Data()); - return kFALSE; + if (!DirectoryExists(path)) { + Warning("CheckInputData", "Run number %s not found in path: <%s>", os->GetString().Data(), path.Data()); + continue; } path = Form("%s/%s.xml", workdir.Data(),os->GetString().Data()); TString msg = "\n##### file: "; @@ -406,9 +551,51 @@ Bool_t AliAnalysisAlien::CheckInputData() if (use_tags) msg += " using_tags: Yes"; else msg += " using_tags: No"; Info("CheckDataType", msg.Data()); - AddDataFile(path); + if (fNrunsPerMaster<2) { + AddDataFile(Form("%s.xml", os->GetString().Data())); + } else { + nruns++; + if (((nruns-1)%fNrunsPerMaster) == 0) { + schunk = os->GetString(); + } + if ((nruns%fNrunsPerMaster)!=0 && os!=arr->Last()) continue; + schunk += Form("_%s.xml", os->GetString().Data()); + AddDataFile(schunk); + } } delete arr; + } else { + Info("CheckDataType", "Using run range [%d, %d]", fRunRange[0], fRunRange[1]); + for (Int_t irun=fRunRange[0]; irun<=fRunRange[1]; irun++) { + path = Form("%s/%s%d ", fGridDataDir.Data(), fRunPrefix.Data(), irun); + if (!DirectoryExists(path)) { +// Warning("CheckInputData", "Run number %d not found in path: <%s>", irun, path.Data()); + continue; + } + path = Form("%s/%s%d.xml", workdir.Data(),fRunPrefix.Data(),irun); + TString msg = "\n##### file: "; + msg += path; + msg += " type: xml_collection;"; + if (use_tags) msg += " using_tags: Yes"; + else msg += " using_tags: No"; + Info("CheckDataType", msg.Data()); + if (fNrunsPerMaster<2) { + AddDataFile(Form("%s%d.xml",fRunPrefix.Data(),irun)); + } else { + nruns++; + if (((nruns-1)%fNrunsPerMaster) == 0) { + schunk = Form("%s%d", fRunPrefix.Data(),irun); + } + schunk2 = Form("_%s%d.xml", fRunPrefix.Data(), irun); + if ((nruns%fNrunsPerMaster)!=0 && irun != fRunRange[1]) continue; + schunk += schunk2; + AddDataFile(schunk); + } + } + if (!fInputFiles) { + schunk += schunk2; + AddDataFile(schunk); + } } return kTRUE; } @@ -417,7 +604,7 @@ Bool_t AliAnalysisAlien::CheckInputData() Bool_t AliAnalysisAlien::CreateDataset(const char *pattern) { // Create dataset for the grid data directory + run number. - if (TestBit(AliAnalysisGrid::kOffline)) return kFALSE; + if (TestBit(AliAnalysisGrid::kOffline)) return kTRUE; if (!Connect()) { Error("CreateDataset", "Cannot create dataset with no grid connection"); return kFALSE; @@ -436,39 +623,248 @@ Bool_t AliAnalysisAlien::CreateDataset(const char *pattern) TString file; TString path; - if (!fRunNumbers.Length()) return kTRUE; - // Several runs - TObjArray *arr = fRunNumbers.Tokenize(" "); - TObjString *os; - TIter next(arr); - while ((os=(TObjString*)next())) { - path = Form("%s/%s ", fGridDataDir.Data(), os->GetString().Data()); + Int_t nruns = 0; + TString schunk, schunk2; + TGridCollection *cbase=0, *cadd=0; + if (!fRunNumbers.Length() && !fRunRange[0]) { + if (fInputFiles && fInputFiles->GetEntries()) return kTRUE; + // Make a single data collection from data directory. + path = fGridDataDir; + if (!DirectoryExists(path)) { + Error("CreateDataset", "Path to data directory %s not valid",fGridDataDir.Data()); + return kFALSE; + } +// CdWork(); if (TestBit(AliAnalysisGrid::kTest)) file = "wn.xml"; - else file = Form("%s.xml", os->GetString().Data()); - if (FileExists(file) && !TestBit(AliAnalysisGrid::kTest)) { - Info("CreateDataset", "\n##### Removing previous dataset %s", file.Data()); - gGrid->Rm(file); + else file = Form("%s.xml", gSystem->BaseName(path)); + if (gSystem->AccessPathName(file) || TestBit(AliAnalysisGrid::kTest) || fOverwriteMode) { + command = "find "; + command += options; + command += path; + command += " "; + command += pattern; + command += conditions; + printf("command: %s\n", command.Data()); + TGridResult *res = gGrid->Command(command); + if (res) delete res; + // Write standard output to file + gROOT->ProcessLine(Form("gGrid->Stdout(); > %s", file.Data())); + Bool_t has_grep = (gSystem->Exec("grep --version 2>/dev/null > /dev/null")==0)?kTRUE:kFALSE; + Bool_t null_file = kFALSE; + if (!has_grep) { + Warning("CreateDataset", "'grep' command not available on this system - cannot validate the result of the grid 'find' command"); + } else { + null_file = (gSystem->Exec(Form("grep /event %s 2>/dev/null > /dev/null",file.Data()))==0)?kFALSE:kTRUE; + Error("CreateDataset","Dataset %s produced by the previous find command is empty !", file.Data()); + return kFALSE; + } } - command = "find "; - command += options; - command += path; - command += pattern; -// conditions = Form(" > %s", file.Data()); - command += conditions; - TGridResult *res = gGrid->Command(command); - if (res) delete res; - // Write standard output to file - gROOT->ProcessLine(Form("gGrid->Stdout(); > %s", file.Data())); - if (TestBit(AliAnalysisGrid::kTest)) break; - // Copy xml file to alien space - TFile::Cp(Form("file:%s",file.Data()), Form("alien://%s/%s",workdir.Data(), file.Data())); - if (!FileExists(file)) { - Error("CreateDataset", "Command %s did NOT succeed", command.Data()); - delete arr; - return kFALSE; + Bool_t fileExists = FileExists(file); + if (!TestBit(AliAnalysisGrid::kTest) && (!fileExists || fOverwriteMode)) { + // Copy xml file to alien space + if (fileExists) gGrid->Rm(file); + TFile::Cp(Form("file:%s",file.Data()), Form("alien://%s/%s",workdir.Data(), file.Data())); + if (!FileExists(file)) { + Error("CreateDataset", "Command %s did NOT succeed", command.Data()); + return kFALSE; + } + // Update list of files to be processed. } + AddDataFile(Form("%s/%s", workdir.Data(), file.Data())); + return kTRUE; } - delete arr; + // Several runs + Bool_t null_result = kTRUE; + if (fRunNumbers.Length()) { + TObjArray *arr = fRunNumbers.Tokenize(" "); + TObjString *os; + TIter next(arr); + while ((os=(TObjString*)next())) { + path = Form("%s/%s ", fGridDataDir.Data(), os->GetString().Data()); + if (!DirectoryExists(path)) continue; +// CdWork(); + if (TestBit(AliAnalysisGrid::kTest)) file = "wn.xml"; + else file = Form("%s.xml", os->GetString().Data()); + // If local collection file does not exist, create it via 'find' command. + if (gSystem->AccessPathName(file) || TestBit(AliAnalysisGrid::kTest) || fOverwriteMode) { + command = "find "; + command += options; + command += path; + command += pattern; + command += conditions; + TGridResult *res = gGrid->Command(command); + if (res) delete res; + // Write standard output to file + gROOT->ProcessLine(Form("gGrid->Stdout(); > %s", file.Data())); + Bool_t has_grep = (gSystem->Exec("grep --version 2>/dev/null > /dev/null")==0)?kTRUE:kFALSE; + Bool_t null_file = kFALSE; + if (!has_grep) { + Warning("CreateDataset", "'grep' command not available on this system - cannot validate the result of the grid 'find' command"); + } else { + null_file = (gSystem->Exec(Form("grep /event %s 2>/dev/null > /dev/null",file.Data()))==0)?kFALSE:kTRUE; + Warning("CreateDataset","Dataset %s produced by: <%s> is empty !", file.Data(), command.Data()); + fRunNumbers.ReplaceAll(os->GetString().Data(), ""); + continue; + } + null_result = kFALSE; + } + if (TestBit(AliAnalysisGrid::kTest)) break; + // Check if there is one run per master job. + if (fNrunsPerMaster<2) { + if (FileExists(file)) { + if (fOverwriteMode) gGrid->Rm(file); + else { + Info("CreateDataset", "\n##### Dataset %s exist. Skipping creation...", file.Data()); + continue; + } + } + // Copy xml file to alien space + TFile::Cp(Form("file:%s",file.Data()), Form("alien://%s/%s",workdir.Data(), file.Data())); + if (!FileExists(file)) { + Error("CreateDataset", "Command %s did NOT succeed", command.Data()); + delete arr; + return kFALSE; + } + } else { + nruns++; + if (((nruns-1)%fNrunsPerMaster) == 0) { + schunk = os->GetString(); + cbase = (TGridCollection*)gROOT->ProcessLine(Form("new TAlienCollection(\"%s\", 1000000);",file.Data())); + } else { + cadd = (TGridCollection*)gROOT->ProcessLine(Form("new TAlienCollection(\"%s\", 1000000);",file.Data())); + printf(" Merging collection <%s> into masterjob input...\n", file.Data()); + cbase->Add(cadd); + delete cadd; + } + if ((nruns%fNrunsPerMaster)!=0 && os!=arr->Last()) { + continue; + } + schunk += Form("_%s.xml", os->GetString().Data()); + if (FileExists(schunk)) { + if (fOverwriteMode) gGrid->Rm(file); + else { + Info("CreateDataset", "\n##### Dataset %s exist. Skipping creation...", schunk.Data()); + continue; + } + } + printf("Exporting merged collection <%s> and copying to AliEn\n", schunk.Data()); + cbase->ExportXML(Form("file://%s", schunk.Data()),kFALSE,kFALSE, schunk, "Merged runs"); + TFile::Cp(Form("file:%s",schunk.Data()), Form("alien://%s/%s",workdir.Data(), schunk.Data())); + if (!FileExists(schunk)) { + Error("CreateDataset", "Copy command did NOT succeed for %s", schunk.Data()); + delete arr; + return kFALSE; + } + } + } + delete arr; + if (null_result) { + Error("CreateDataset", "No valid dataset corresponding to the query!"); + return kFALSE; + } + } else { + // Process a full run range. + for (Int_t irun=fRunRange[0]; irun<=fRunRange[1]; irun++) { + path = Form("%s/%s%d ", fGridDataDir.Data(), fRunPrefix.Data(), irun); + if (!DirectoryExists(path)) continue; +// CdWork(); + if (TestBit(AliAnalysisGrid::kTest)) file = "wn.xml"; + else file = Form("%s%d.xml", fRunPrefix.Data(), irun); + if (FileExists(file) && fNrunsPerMaster<2 && !TestBit(AliAnalysisGrid::kTest)) { + if (fOverwriteMode) gGrid->Rm(file); + else { + Info("CreateDataset", "\n##### Dataset %s exist. Skipping creation...", file.Data()); + continue; + } + } + // If local collection file does not exist, create it via 'find' command. + if (gSystem->AccessPathName(file) || TestBit(AliAnalysisGrid::kTest) || fOverwriteMode) { + command = "find "; + command += options; + command += path; + command += pattern; + command += conditions; + TGridResult *res = gGrid->Command(command); + if (res) delete res; + // Write standard output to file + gROOT->ProcessLine(Form("gGrid->Stdout(); > %s", file.Data())); + Bool_t has_grep = (gSystem->Exec("grep --version 2>/dev/null > /dev/null")==0)?kTRUE:kFALSE; + Bool_t null_file = kFALSE; + if (!has_grep) { + Warning("CreateDataset", "'grep' command not available on this system - cannot validate the result of the grid 'find' command"); + } else { + null_file = (gSystem->Exec(Form("grep /event %s 2>/dev/null > /dev/null",file.Data()))==0)?kFALSE:kTRUE; + Warning("CreateDataset","Dataset %s produced by: <%s> is empty !", file.Data(), command.Data()); + continue; + } + null_result = kFALSE; + } + if (TestBit(AliAnalysisGrid::kTest)) break; + // Check if there is one run per master job. + if (fNrunsPerMaster<2) { + if (FileExists(file)) { + if (fOverwriteMode) gGrid->Rm(file); + else { + Info("CreateDataset", "\n##### Dataset %s exist. Skipping creation...", file.Data()); + continue; + } + } + // Copy xml file to alien space + TFile::Cp(Form("file:%s",file.Data()), Form("alien://%s/%s",workdir.Data(), file.Data())); + if (!FileExists(file)) { + Error("CreateDataset", "Command %s did NOT succeed", command.Data()); + return kFALSE; + } + } else { + nruns++; + // Check if the collection for the chunk exist locally. + Int_t nchunk = (nruns-1)/fNrunsPerMaster; + if (FileExists(fInputFiles->At(nchunk)->GetName())) { + if (fOverwriteMode) gGrid->Rm(fInputFiles->At(nchunk)->GetName()); + else continue; + } + printf(" Merging collection <%s> into %d runs chunk...\n",file.Data(),fNrunsPerMaster); + if (((nruns-1)%fNrunsPerMaster) == 0) { + schunk = Form("%s%d", fRunPrefix.Data(), irun); + cbase = (TGridCollection*)gROOT->ProcessLine(Form("new TAlienCollection(\"%s\", 1000000);",file.Data())); + } else { + cadd = (TGridCollection*)gROOT->ProcessLine(Form("new TAlienCollection(\"%s\", 1000000);",file.Data())); + cbase->Add(cadd); + delete cadd; + } + schunk2 = Form("%s_%s%d.xml", schunk.Data(), fRunPrefix.Data(), irun); + if ((nruns%fNrunsPerMaster)!=0 && irun!=fRunRange[1] && schunk2 != fInputFiles->Last()->GetName()) { + continue; + } + schunk = schunk2; + if (FileExists(schunk)) { + if (fOverwriteMode) gGrid->Rm(schunk); + else { + Info("CreateDataset", "\n##### Dataset %s exist. Skipping creation...", schunk.Data()); + continue; + } + } + printf("Exporting merged collection <%s> and copying to AliEn.\n", schunk.Data()); + cbase->ExportXML(Form("file://%s", schunk.Data()),kFALSE,kFALSE, schunk, "Merged runs"); + if (FileExists(schunk)) { + if (fOverwriteMode) gGrid->Rm(schunk); + else { + Info("CreateDataset", "\n##### Dataset %s exist. Skipping copy...", schunk.Data()); + continue; + } + } + TFile::Cp(Form("file:%s",schunk.Data()), Form("alien://%s/%s",workdir.Data(), schunk.Data())); + if (!FileExists(schunk)) { + Error("CreateDataset", "Copy command did NOT succeed for %s", schunk.Data()); + return kFALSE; + } + } + } + if (null_result) { + Error("CreateDataset", "No valid dataset corresponding to the query!"); + return kFALSE; + } + } return kTRUE; } @@ -508,12 +904,13 @@ Bool_t AliAnalysisAlien::CreateJDL() Error("CreateJDL", "You must define AliEn output directory"); error = kTRUE; } else { - if (!gGrid->Cd(fGridOutputDir)) { + if (!fGridOutputDir.Contains("/")) fGridOutputDir = Form("%s/%s", workdir.Data(), fGridOutputDir.Data()); + if (!DirectoryExists(fGridOutputDir)) { if (gGrid->Mkdir(fGridOutputDir)) { Info("CreateJDL", "\n##### Created alien output directory %s", fGridOutputDir.Data()); } else { Error("CreateJDL", "Could not create alien output directory %s", fGridOutputDir.Data()); - error = kTRUE; + // error = kTRUE; } } gGrid->Cd(workdir); @@ -521,32 +918,68 @@ Bool_t AliAnalysisAlien::CreateJDL() // Exit if any error up to now if (error) return kFALSE; // Set JDL fields - fGridJDL->SetValue("User", Form("\"%s\"", fUser.Data())); - fGridJDL->SetExecutable(fExecutable); -// fGridJDL->SetTTL((UInt_t)fTTL); - fGridJDL->SetValue("TTL", Form("\"%d\"", fTTL)); - if (fMaxInitFailed > 0) + if (!fUser.IsNull()) { + fGridJDL->SetValue("User", Form("\"%s\"", fUser.Data())); + fMergingJDL->SetValue("User", Form("\"%s\"", fUser.Data())); + } + fGridJDL->SetExecutable(fExecutable, "This is the startup script"); + TString mergeExec = fExecutable; + mergeExec.ReplaceAll(".sh", "_merge.sh"); + fMergingJDL->SetExecutable(mergeExec, "This is the startup script"); + mergeExec.ReplaceAll(".sh", ".C"); + fMergingJDL->AddToInputSandbox(Form("LF:%s/%s", workdir.Data(),mergeExec.Data()), "List of input files to be uploaded to workers"); + if (!fArguments.IsNull()) + fGridJDL->SetArguments(fArguments, "Arguments for the executable command"); + fMergingJDL->SetArguments("$1"); + fGridJDL->SetValue("TTL", Form("\"%d\"",fTTL)); + fGridJDL->SetDescription("TTL", Form("Time after which the job is killed (%d min.)", fTTL/60)); + fMergingJDL->SetValue("TTL", Form("\"%d\"",fTTL)); + fMergingJDL->SetDescription("TTL", Form("Time after which the job is killed (%d min.)", fTTL/60)); + + if (fMaxInitFailed > 0) { fGridJDL->SetValue("MaxInitFailed", Form("\"%d\"",fMaxInitFailed)); - if (fSplitMaxInputFileNumber > 0) + fGridJDL->SetDescription("MaxInitFailed", "Maximum number of first failing jobs to abort the master job"); + } + if (fSplitMaxInputFileNumber > 0) { fGridJDL->SetValue("SplitMaxInputFileNumber", Form("\"%d\"", fSplitMaxInputFileNumber)); - if (fSplitMode.Length()) + fGridJDL->SetDescription("SplitMaxInputFileNumber", "Maximum number of input files to be processed per subjob"); + } + if (fSplitMode.Length()) { fGridJDL->SetValue("Split", Form("\"%s\"", fSplitMode.Data())); -// fGridJDL->SetSplitMode(fSplitMode, (UInt_t)fSplitMaxInputFileNumber); - if (fAliROOTVersion.Length()) - fGridJDL->AddToPackages("AliRoot", fAliROOTVersion); - if (fROOTVersion.Length()) + fGridJDL->SetDescription("Split", "We split per SE or file"); + } + if (!fAliROOTVersion.IsNull()) { + fGridJDL->AddToPackages("AliRoot", fAliROOTVersion,"VO_ALICE", "List of requested packages"); + fMergingJDL->AddToPackages("AliRoot", fAliROOTVersion, "VO_ALICE", "List of requested packages"); + } + if (!fROOTVersion.IsNull()) { fGridJDL->AddToPackages("ROOT", fROOTVersion); - if (fAPIVersion.Length()) + fMergingJDL->AddToPackages("ROOT", fROOTVersion); + } + if (!fAPIVersion.IsNull()) { fGridJDL->AddToPackages("APISCONFIG", fAPIVersion); - fGridJDL->SetInputDataListFormat(fInputFormat); - fGridJDL->SetInputDataList("wn.xml"); - if (fInputFiles) { - TIter next(fInputFiles); - while ((os=(TObjString*)next())) - fGridJDL->AddToInputDataCollection(Form("LF:%s,nodownload", os->GetString().Data())); - } - fGridJDL->AddToInputSandbox(Form("LF:%s/%s", workdir.Data(), fAnalysisMacro.Data())); - fGridJDL->AddToInputSandbox(Form("LF:%s/analysis.root", workdir.Data())); + fMergingJDL->AddToPackages("APISCONFIG", fAPIVersion); + } + if (!fExternalPackages.IsNull()) { + arr = fExternalPackages.Tokenize(" "); + TIter next(arr); + while ((os=(TObjString*)next())) { + TString pkgname = os->GetString(); + Int_t index = pkgname.Index("::"); + TString pkgversion = pkgname(index+2, pkgname.Length()); + pkgname.Remove(index); + fGridJDL->AddToPackages(pkgname, pkgversion); + fMergingJDL->AddToPackages(pkgname, pkgversion); + } + delete arr; + } + fGridJDL->SetInputDataListFormat(fInputFormat, "Format of input data"); + fGridJDL->SetInputDataList("wn.xml", "Collection name to be processed on each worker node"); + fGridJDL->AddToInputSandbox(Form("LF:%s/%s", workdir.Data(), fAnalysisMacro.Data()), "List of input files to be uploaded to workers"); + TString analysisFile = fExecutable; + analysisFile.ReplaceAll(".sh", ".root"); + fGridJDL->AddToInputSandbox(Form("LF:%s/%s", workdir.Data(),analysisFile.Data())); + fMergingJDL->AddToInputSandbox(Form("LF:%s/%s", workdir.Data(),analysisFile.Data())); if (IsUsingTags() && !gSystem->AccessPathName("ConfigureCuts.C")) fGridJDL->AddToInputSandbox(Form("LF:%s/ConfigureCuts.C", workdir.Data())); if (fAdditionalLibs.Length()) { @@ -555,107 +988,154 @@ Bool_t AliAnalysisAlien::CreateJDL() while ((os=(TObjString*)next())) { if (os->GetString().Contains(".so")) continue; fGridJDL->AddToInputSandbox(Form("LF:%s/%s", workdir.Data(), os->GetString().Data())); + fMergingJDL->AddToInputSandbox(Form("LF:%s/%s", workdir.Data(), os->GetString().Data())); } delete arr; } if (fPackages) { TIter next(fPackages); TObject *obj; - while ((obj=next())) + while ((obj=next())) { fGridJDL->AddToInputSandbox(Form("LF:%s/%s", workdir.Data(), obj->GetName())); + fMergingJDL->AddToInputSandbox(Form("LF:%s/%s", workdir.Data(), obj->GetName())); + } } if (fOutputArchive.Length()) { arr = fOutputArchive.Tokenize(" "); TIter next(arr); - while ((os=(TObjString*)next())) - if (!os->GetString().Contains("@") && fCloseSE.Length()) - fGridJDL->AddToOutputArchive(Form("%s@%s",os->GetString().Data(), fCloseSE.Data())); - else - fGridJDL->AddToOutputArchive(os->GetString()); + Bool_t first = kTRUE; + const char *comment = "Files to be archived"; + const char *comment1 = comment; + while ((os=(TObjString*)next())) { + if (!first) comment = NULL; + if (!os->GetString().Contains("@") && fCloseSE.Length()) + fGridJDL->AddToOutputArchive(Form("%s@%s",os->GetString().Data(), fCloseSE.Data()), comment); + else + fGridJDL->AddToOutputArchive(os->GetString(), comment); + first = kFALSE; + } delete arr; + TString outputArchive = fOutputArchive; + if (!fMergeExcludes.IsNull()) { + arr = fMergeExcludes.Tokenize(" "); + TIter next1(arr); + while ((os=(TObjString*)next1())) { + outputArchive.ReplaceAll(Form("%s,",os->GetString().Data()),""); + outputArchive.ReplaceAll(os->GetString(),""); + } + delete arr; + } + arr = outputArchive.Tokenize(" "); + TIter next2(arr); + comment = comment1; + first = kTRUE; + while ((os=(TObjString*)next2())) { + if (!first) comment = NULL; + if (!os->GetString().Contains("@") && fCloseSE.Length()) + fMergingJDL->AddToOutputArchive(Form("%s@%s",os->GetString().Data(), fCloseSE.Data()), comment); + else + fMergingJDL->AddToOutputArchive(os->GetString(), comment); + first = kFALSE; + } + delete arr; } - fGridJDL->SetOutputDirectory(Form("%s/%s/#alien_counter_03i#", workdir.Data(), fGridOutputDir.Data())); arr = fOutputFiles.Tokenize(" "); TIter next(arr); + Bool_t first = kTRUE; + const char *comment = "Files to be archived"; + const char *comment1 = comment; while ((os=(TObjString*)next())) { + // Ignore ouputs in jdl that are also in outputarchive + TString sout = os->GetString(); + if (sout.Index("@")>0) sout.Remove(sout.Index("@")); + if (fOutputArchive.Contains(sout)) continue; + if (!first) comment = NULL; if (!os->GetString().Contains("@") && fCloseSE.Length()) - fGridJDL->AddToOutputSandbox(Form("%s@%s",os->GetString().Data(), fCloseSE.Data())); + fGridJDL->AddToOutputSandbox(Form("%s@%s",os->GetString().Data(), fCloseSE.Data()), comment); else - fGridJDL->AddToOutputSandbox(os->GetString()); + fGridJDL->AddToOutputSandbox(os->GetString(), comment); + first = kFALSE; } delete arr; -// fGridJDL->SetPrice((UInt_t)fPrice); - fGridJDL->SetValue("Price", Form("\"%d\"", fPrice)); - fGridJDL->SetValidationCommand(Form("%s/validate.sh", workdir.Data())); - if (fMasterResubmitThreshold) fGridJDL->SetValue("MasterResubmitThreshold", Form("\"%d%%\"", fMasterResubmitThreshold)); - // Generate the JDL as a string - TString sjdl = fGridJDL->Generate(); - Int_t index; - index = sjdl.Index("Executable"); - if (index >= 0) sjdl.Insert(index, "\n# This is the startup script\n"); - index = sjdl.Index("Split "); - if (index >= 0) sjdl.Insert(index, "\n# We split per storage element\n"); - index = sjdl.Index("SplitMaxInputFileNumber"); - if (index >= 0) sjdl.Insert(index, "\n# We want each subjob to get maximum this number of input files\n"); - index = sjdl.Index("InputDataCollection"); - if (index >= 0) sjdl.Insert(index, "# Input xml collections\n"); - index = sjdl.Index("InputFile"); - if (index >= 0) sjdl.Insert(index, "\n# List of input files to be uploaded to wn's\n"); - index = sjdl.Index("InputDataList "); - if (index >= 0) sjdl.Insert(index, "\n# Collection to be processed on wn\n"); - index = sjdl.Index("InputDataListFormat"); - if (index >= 0) sjdl.Insert(index, "\n# Format of input data\n"); - index = sjdl.Index("Price"); - if (index >= 0) sjdl.Insert(index, "\n# AliEn price for this job\n"); - index = sjdl.Index("Requirements"); - if (index >= 0) sjdl.Insert(index, "\n# Additional requirements for the computing element\n"); - index = sjdl.Index("Packages"); - if (index >= 0) sjdl.Insert(index, "\n# Packages to be used\n"); - index = sjdl.Index("User ="); - if (index >= 0) sjdl.Insert(index, "\n# AliEn user\n"); - index = sjdl.Index("TTL"); - if (index >= 0) sjdl.Insert(index, "\n# Time to live for the job\n"); - index = sjdl.Index("OutputFile"); - if (index >= 0) sjdl.Insert(index, "\n# List of output files to be registered\n"); - index = sjdl.Index("OutputDir"); - if (index >= 0) sjdl.Insert(index, "\n# Output directory\n"); - index = sjdl.Index("OutputArchive"); - if (index >= 0) sjdl.Insert(index, "\n# Files to be archived\n"); - index = sjdl.Index("MaxInitFailed"); - if (index >= 0) sjdl.Insert(index, "\n# Maximum number of first failing jobs to abort the master job\n"); - index = sjdl.Index("MasterResubmitThreshold"); - if (index >= 0) sjdl.Insert(index, "\n# Resubmit failed jobs until DONE rate reaches this percentage\n"); - sjdl.ReplaceAll("ValidationCommand", "Validationcommand"); - index = sjdl.Index("Validationcommand"); - if (index >= 0) sjdl.Insert(index, "\n# Validation script to be run for each subjob\n"); - sjdl.ReplaceAll("\"LF:", "\n \"LF:"); - sjdl.ReplaceAll("(member", "\n (member"); - sjdl.ReplaceAll("\",\"VO_", "\",\n \"VO_"); - sjdl.ReplaceAll("{", "{\n "); - sjdl.ReplaceAll("};", "\n};"); - sjdl.ReplaceAll("{\n \n", "{\n"); - sjdl.ReplaceAll("\n\n", "\n"); - sjdl.ReplaceAll("OutputDirectory", "OutputDir"); - sjdl += "JDLVariables = \n{\n \"Packages\",\n \"OutputDir\"\n};\n"; - sjdl.Prepend("JobTag = \"Automatically generated analysis JDL\";\n"); - index = sjdl.Index("JDLVariables"); - if (index >= 0) sjdl.Insert(index, "\n# JDL variables\n"); - // Write jdl to file - ofstream out; - out.open(fJDLName.Data(), ios::out); - if (out.bad()) { - Error("CreateJDL", "Bad file name: %s", fJDLName.Data()); - return kFALSE; + if (fOutputFiles.Length()) { + TString outputFiles = fOutputFiles; + if (!fMergeExcludes.IsNull()) { + arr = fMergeExcludes.Tokenize(" "); + TIter next1(arr); + while ((os=(TObjString*)next1())) { + outputFiles.ReplaceAll(Form("%s,",os->GetString().Data()),""); + outputFiles.ReplaceAll(os->GetString(),""); + } + delete arr; + } + arr = outputFiles.Tokenize(" "); + TIter next2(arr); + comment = comment1; + first = kTRUE; + while ((os=(TObjString*)next2())) { + // Ignore ouputs in jdl that are also in outputarchive + TString sout = os->GetString(); + if (sout.Index("@")>0) sout.Remove(sout.Index("@")); + if (fOutputArchive.Contains(sout)) continue; + if (!first) comment = NULL; + if (!os->GetString().Contains("@") && fCloseSE.Length()) + fMergingJDL->AddToOutputSandbox(Form("%s@%s",os->GetString().Data(), fCloseSE.Data()), comment); + else + fMergingJDL->AddToOutputSandbox(os->GetString(), comment); + } + delete arr; } - out << sjdl << endl; + fGridJDL->SetPrice((UInt_t)fPrice, "AliEn price for this job"); + fMergingJDL->SetPrice((UInt_t)fPrice, "AliEn price for this job"); + TString validationScript = fExecutable; + validationScript.ReplaceAll(".sh", "_validation.sh"); + fGridJDL->SetValidationCommand(Form("%s/%s", workdir.Data(),validationScript.Data()), "Validation script to be run for each subjob"); + validationScript = fExecutable; + validationScript.ReplaceAll(".sh", "_mergevalidation.sh"); + fMergingJDL->SetValidationCommand(Form("%s/%s", workdir.Data(),validationScript.Data()), "Validation script to be run for each subjob"); + if (fMasterResubmitThreshold) { + fGridJDL->SetValue("MasterResubmitThreshold", Form("\"%d%%\"", fMasterResubmitThreshold)); + fGridJDL->SetDescription("MasterResubmitThreshold", "Resubmit failed jobs until DONE rate reaches this percentage"); + } + // Write a jdl with 2 input parameters: collection name and output dir name. + WriteJDL(copy); } // Copy jdl to grid workspace - if (!copy) { - Info("CreateJDL", "\n##### You may want to review jdl:%s and analysis macro:%s before running in mode", fJDLName.Data(), fAnalysisMacro.Data()); - } else { - Info("CreateJDL", "\n##### Copying JDL file <%s> to your AliEn working space", fJDLName.Data()); - if (FileExists(fJDLName)) gGrid->Rm(fJDLName); - TFile::Cp(Form("file:%s",fJDLName.Data()), Form("alien://%s/%s", workdir.Data(), fJDLName.Data())); + if (copy) { + // Check if an output directory was defined and valid + if (!fGridOutputDir.Length()) { + Error("CreateJDL", "You must define AliEn output directory"); + return kFALSE; + } else { + if (!fGridOutputDir.Contains("/")) fGridOutputDir = Form("%s/%s", workdir.Data(), fGridOutputDir.Data()); + if (!fProductionMode && !DirectoryExists(fGridOutputDir)) { + if (gGrid->Mkdir(fGridOutputDir)) { + Info("CreateJDL", "\n##### Created alien output directory %s", fGridOutputDir.Data()); + } else { + Error("CreateJDL", "Could not create alien output directory %s", fGridOutputDir.Data()); + return kFALSE; + } + } + gGrid->Cd(workdir); + } + if (TestBit(AliAnalysisGrid::kSubmit)) { + TString mergeJDLName = fExecutable; + mergeJDLName.ReplaceAll(".sh", "_merge.jdl"); + TString locjdl = Form("%s/%s", fGridOutputDir.Data(),fJDLName.Data()); + TString locjdl1 = Form("%s/%s", fGridOutputDir.Data(),mergeJDLName.Data()); + if (fProductionMode) { + locjdl = Form("%s/%s", workdir.Data(),fJDLName.Data()); + locjdl1 = Form("%s/%s", workdir.Data(),mergeJDLName.Data()); + } + if (FileExists(locjdl)) gGrid->Rm(locjdl); + if (FileExists(locjdl1)) gGrid->Rm(locjdl1); + Info("CreateJDL", "\n##### Copying JDL file <%s> to your AliEn output directory", fJDLName.Data()); + TFile::Cp(Form("file:%s",fJDLName.Data()), Form("alien://%s", locjdl.Data())); + if (fMergeViaJDL) { + Info("CreateJDL", "\n##### Copying merging JDL file <%s> to your AliEn output directory", mergeJDLName.Data()); + TFile::Cp(Form("file:%s",mergeJDLName.Data()), Form("alien://%s", locjdl1.Data())); + } + } if (fAdditionalLibs.Length()) { arr = fAdditionalLibs.Tokenize(" "); TObjString *os; @@ -672,6 +1152,7 @@ Bool_t AliAnalysisAlien::CreateJDL() TIter next(fPackages); TObject *obj; while ((obj=next())) { + if (FileExists(obj->GetName())) gGrid->Rm(obj->GetName()); Info("CreateJDL", "\n##### Copying dependency: <%s> to your alien workspace", obj->GetName()); TFile::Cp(Form("file:%s",obj->GetName()), Form("alien://%s/%s", workdir.Data(), obj->GetName())); } @@ -681,13 +1162,115 @@ Bool_t AliAnalysisAlien::CreateJDL() } //______________________________________________________________________________ -Bool_t AliAnalysisAlien::FileExists(const char *lfn) const +Bool_t AliAnalysisAlien::WriteJDL(Bool_t copy) { -// Returns true if file exists. - if (!gGrid) { - Error("FileExists", "No connection to grid"); +// Writes one or more JDL's corresponding to findex. If findex is negative, +// all run numbers are considered in one go (jdl). For non-negative indices +// they correspond to the indices in the array fInputFiles. + if (!fInputFiles) return kFALSE; + TObjString *os; + TString workdir = gGrid->GetHomeDirectory(); + workdir += fGridWorkingDir; + + if (!fRunNumbers.Length() && !fRunRange[0]) { + // One jdl with no parameters in case input data is specified by name. + TIter next(fInputFiles); + while ((os=(TObjString*)next())) + fGridJDL->AddToInputDataCollection(Form("LF:%s,nodownload", os->GetString().Data()), "Input xml collections"); + if (!fOutputSingle.IsNull()) + fGridJDL->SetOutputDirectory(Form("#alienfulldir#/../%s",fOutputSingle.Data()), "Output directory"); + else { + fGridJDL->SetOutputDirectory(Form("%s/#alien_counter_03i#", fGridOutputDir.Data()), "Output directory"); + fMergingJDL->SetOutputDirectory(fGridOutputDir); + } + } else { + // One jdl to be submitted with 2 input parameters: data collection name and output dir prefix + fGridJDL->AddToInputDataCollection(Form("LF:%s/$1,nodownload", workdir.Data()), "Input xml collections"); + if (!fOutputSingle.IsNull()) { + if (!fOutputToRunNo) fGridJDL->SetOutputDirectory(Form("#alienfulldir#/%s",fOutputSingle.Data()), "Output directory"); + else fGridJDL->SetOutputDirectory(Form("%s/$2",fGridOutputDir.Data()), "Output directory"); + } else { + fGridJDL->SetOutputDirectory(Form("%s/$2/#alien_counter_03i#", fGridOutputDir.Data()), "Output directory"); + fMergingJDL->SetOutputDirectory(Form("%s/$1", fGridOutputDir.Data()), "Output directory"); + } + } + + + // Generate the JDL as a string + TString sjdl = fGridJDL->Generate(); + TString sjdl1 = fMergingJDL->Generate(); + Int_t index; + sjdl.ReplaceAll("\"LF:", "\n \"LF:"); + sjdl.ReplaceAll("(member", "\n (member"); + sjdl.ReplaceAll("\",\"VO_", "\",\n \"VO_"); + sjdl.ReplaceAll("{", "{\n "); + sjdl.ReplaceAll("};", "\n};"); + sjdl.ReplaceAll("{\n \n", "{\n"); + sjdl.ReplaceAll("\n\n", "\n"); + sjdl.ReplaceAll("OutputDirectory", "OutputDir"); + sjdl1.ReplaceAll("\"LF:", "\n \"LF:"); + sjdl1.ReplaceAll("(member", "\n (member"); + sjdl1.ReplaceAll("\",\"VO_", "\",\n \"VO_"); + sjdl1.ReplaceAll("{", "{\n "); + sjdl1.ReplaceAll("};", "\n};"); + sjdl1.ReplaceAll("{\n \n", "{\n"); + sjdl1.ReplaceAll("\n\n", "\n"); + sjdl1.ReplaceAll("OutputDirectory", "OutputDir"); + sjdl += "JDLVariables = \n{\n \"Packages\",\n \"OutputDir\"\n};\n"; + sjdl.Prepend(Form("Jobtag = {\n \"comment:%s\"\n};\n", fJobTag.Data())); + index = sjdl.Index("JDLVariables"); + if (index >= 0) sjdl.Insert(index, "\n# JDL variables\n"); + sjdl1 += "JDLVariables = \n{\n \"Packages\",\n \"OutputDir\"\n};\n"; + sjdl1.Prepend(Form("Jobtag = {\n \"comment:Merging_%s\"\n};\n", fJobTag.Data())); + index = sjdl1.Index("JDLVariables"); + if (index >= 0) sjdl1.Insert(index, "\n# JDL variables\n"); + // Write jdl to file + ofstream out; + out.open(fJDLName.Data(), ios::out); + if (out.bad()) { + Error("CreateJDL", "Bad file name: %s", fJDLName.Data()); return kFALSE; } + out << sjdl << endl; + TString mergeJDLName = fExecutable; + mergeJDLName.ReplaceAll(".sh", "_merge.jdl"); + if (fMergeViaJDL) { + ofstream out1; + out1.open(mergeJDLName.Data(), ios::out); + if (out.bad()) { + Error("CreateJDL", "Bad file name: %s", mergeJDLName.Data()); + return kFALSE; + } + out1 << sjdl1 << endl; + } + + // Copy jdl to grid workspace + if (!copy) { + Info("CreateJDL", "\n##### You may want to review jdl:%s and analysis macro:%s before running in mode", fJDLName.Data(), fAnalysisMacro.Data()); + } else { + TString locjdl = Form("%s/%s", fGridOutputDir.Data(),fJDLName.Data()); + TString locjdl1 = Form("%s/%s", fGridOutputDir.Data(),mergeJDLName.Data()); + if (fProductionMode) { + locjdl = Form("%s/%s", workdir.Data(),fJDLName.Data()); + locjdl1 = Form("%s/%s", workdir.Data(),mergeJDLName.Data()); + } + if (FileExists(locjdl)) gGrid->Rm(locjdl); + if (FileExists(locjdl1)) gGrid->Rm(locjdl1); + Info("CreateJDL", "\n##### Copying JDL file <%s> to your AliEn output directory", fJDLName.Data()); + TFile::Cp(Form("file:%s",fJDLName.Data()), Form("alien://%s", locjdl.Data())); + if (fMergeViaJDL) { + Info("CreateJDL", "\n##### Copying merging JDL file <%s> to your AliEn output directory", mergeJDLName.Data()); + TFile::Cp(Form("file:%s",mergeJDLName.Data()), Form("alien://%s", locjdl1.Data())); + } + } + return kTRUE; +} + +//______________________________________________________________________________ +Bool_t AliAnalysisAlien::FileExists(const char *lfn) +{ +// Returns true if file exists. + if (!gGrid) return kFALSE; TGridResult *res = gGrid->Ls(lfn); if (!res) return kFALSE; TMap *map = dynamic_cast(res->At(0)); @@ -704,6 +1287,35 @@ Bool_t AliAnalysisAlien::FileExists(const char *lfn) const return kTRUE; } +//______________________________________________________________________________ +Bool_t AliAnalysisAlien::DirectoryExists(const char *dirname) +{ +// Returns true if directory exists. Can be also a path. + if (!gGrid) return kFALSE; + // Check if dirname is a path + TString dirstripped = dirname; + dirstripped = dirstripped.Strip(); + dirstripped = dirstripped.Strip(TString::kTrailing, '/'); + TString dir = gSystem->BaseName(dirstripped); + dir += "/"; + TString path = gSystem->DirName(dirstripped); + TGridResult *res = gGrid->Ls(path, "-F"); + if (!res) return kFALSE; + TIter next(res); + TMap *map; + TObject *obj; + while ((map=dynamic_cast(next()))) { + obj = map->GetValue("name"); + if (!obj) break; + if (dir == obj->GetName()) { + delete res; + return kTRUE; + } + } + delete res; + return kFALSE; +} + //______________________________________________________________________________ void AliAnalysisAlien::CheckDataType(const char *lfn, Bool_t &is_collection, Bool_t &is_xml, Bool_t &use_tags) { @@ -772,7 +1384,7 @@ void AliAnalysisAlien::CheckDataType(const char *lfn, Bool_t &is_collection, Boo } use_tags = slfn.Contains(".tag"); if (slfn.Contains(".root")) msg += " type: root file;"; - else msg += " type: unhnown file;"; + else msg += " type: unknown file;"; if (use_tags) msg += " using_tags: Yes"; else msg += " using_tags: No"; Info("CheckDataType", msg.Data()); @@ -786,7 +1398,7 @@ void AliAnalysisAlien::EnablePackage(const char *package) pkg.ReplaceAll(".par", ""); pkg += ".par"; if (gSystem->AccessPathName(pkg)) { - Error("EnablePackage", "Package %s not found", pkg.Data()); + Fatal("EnablePackage", "Package %s not found", pkg.Data()); return; } if (!TObject::TestBit(AliAnalysisGrid::kUsePars)) @@ -799,6 +1411,46 @@ void AliAnalysisAlien::EnablePackage(const char *package) fPackages->Add(new TObjString(pkg)); } +//______________________________________________________________________________ +const char *AliAnalysisAlien::GetJobStatus(Int_t jobidstart, Int_t lastid, Int_t &nrunning, Int_t &nwaiting, Int_t &nerror, Int_t &ndone) +{ +// Get job status for all jobs with jobid>jobidstart. + static char mstatus[20]; + mstatus[0] = '\0'; + nrunning = 0; + nwaiting = 0; + nerror = 0; + ndone = 0; + TGridJobStatusList *list = gGrid->Ps(""); + if (!list) return mstatus; + Int_t nentries = list->GetSize(); + TGridJobStatus *status; + Int_t pid; + for (Int_t ijob=0; ijobAt(ijob); + pid = gROOT->ProcessLine(Form("atoi(((TAlienJobStatus*)0x%lx)->GetKey(\"queueId\"));", (ULong_t)status)); + if (pidProcessLine(Form("sprintf((char*)0x%lx,((TAlienJobStatus*)0x%lx)->GetKey(\"status\"));",(ULong_t)mstatus, (ULong_t)status)); + } + switch (status->GetStatus()) { + case TGridJobStatus::kWAITING: + nwaiting++; break; + case TGridJobStatus::kRUNNING: + nrunning++; break; + case TGridJobStatus::kABORTED: + case TGridJobStatus::kFAIL: + case TGridJobStatus::kUNKNOWN: + nerror++; break; + case TGridJobStatus::kDONE: + ndone++; + } + } + list->Delete(); + delete list; + return mstatus; +} + //______________________________________________________________________________ Bool_t AliAnalysisAlien::IsCollection(const char *lfn) const { @@ -817,21 +1469,108 @@ Bool_t AliAnalysisAlien::IsCollection(const char *lfn) const return kFALSE; } +//______________________________________________________________________________ +Bool_t AliAnalysisAlien::IsSingleOutput() const +{ +// Check if single-ouput option is on. + return (!fOutputSingle.IsNull()); +} + +//______________________________________________________________________________ +void AliAnalysisAlien::Print(Option_t *) const +{ +// Print current plugin settings. + printf("### AliEn analysis plugin current settings ###\n"); + printf("= Copy files to grid: __________________________ %s\n", (IsUseCopy())?"YES":"NO"); + printf("= Check if files can be copied to grid: ________ %s\n", (IsCheckCopy())?"YES":"NO"); + printf("= Production mode:______________________________ %d\n", fProductionMode); + printf("= Version of API requested: ____________________ %s\n", fAPIVersion.Data()); + printf("= Version of ROOT requested: ___________________ %s\n", fROOTVersion.Data()); + printf("= Version of AliRoot requested: ________________ %s\n", fAliROOTVersion.Data()); + if (fUser.Length()) + printf("= User running the plugin: _____________________ %s\n", fUser.Data()); + printf("= Grid workdir relative to user $HOME: _________ %s\n", fGridWorkingDir.Data()); + printf("= Grid output directory relative to workdir: ___ %s\n", fGridOutputDir.Data()); + printf("= Data base directory path requested: __________ %s\n", fGridDataDir.Data()); + printf("= Data search pattern: _________________________ %s\n", fDataPattern.Data()); + printf("= Input data format: ___________________________ %s\n", fInputFormat.Data()); + if (fRunNumbers.Length()) + printf("= Run numbers to be processed: _________________ %s\n", fRunNumbers.Data()); + if (fRunRange[0]) + printf("= Run range to be processed: ___________________ %s%d-%s%d\n", fRunPrefix.Data(), fRunRange[0], fRunPrefix.Data(), fRunRange[1]); + if (!fRunRange[0] && !fRunNumbers.Length()) { + TIter next(fInputFiles); + TObject *obj; + TString list; + while ((obj=next())) list += obj->GetName(); + printf("= Input files to be processed: _________________ %s\n", list.Data()); + } + if (TestBit(AliAnalysisGrid::kTest)) + printf("= Number of input files used in test mode: _____ %d\n", fNtestFiles); + printf("= List of output files to be registered: _______ %s\n", fOutputFiles.Data()); + printf("= List of outputs going to be archived: ________ %s\n", fOutputArchive.Data()); + printf("= List of outputs that should not be merged: ___ %s\n", fMergeExcludes.Data()); + printf("=====================================================================\n"); + printf("= Job price: ___________________________________ %d\n", fPrice); + printf("= Time to live (TTL): __________________________ %d\n", fTTL); + printf("= Max files per subjob: ________________________ %d\n", fSplitMaxInputFileNumber); + if (fMaxInitFailed>0) + printf("= Max number of subjob fails to kill: __________ %d\n", fMaxInitFailed); + if (fMasterResubmitThreshold>0) + printf("= Resubmit master job if failed subjobs >_______ %d\n", fMasterResubmitThreshold); + if (fNrunsPerMaster>0) + printf("= Number of runs per master job: _______________ %d\n", fNrunsPerMaster); + printf("= Number of files in one chunk to be merged: ___ %d\n", fMaxMergeFiles); + printf("= Name of the generated execution script: ______ %s\n", fExecutable.Data()); + printf("= Executable command: __________________________ %s\n", fExecutableCommand.Data()); + if (fArguments.Length()) + printf("= Arguments for the execution script: __________ %s\n",fArguments.Data()); + if (fExecutableArgs.Length()) + printf("= Arguments after macro name in executable______ %s\n",fExecutableArgs.Data()); + printf("= Name of the generated analysis macro: ________ %s\n",fAnalysisMacro.Data()); + printf("= User analysis files to be deployed: __________ %s\n",fAnalysisSource.Data()); + printf("= Additional libs to be loaded or souces to be compiled runtime: <%s>\n",fAdditionalLibs.Data()); + printf("= Master jobs split mode: ______________________ %s\n",fSplitMode.Data()); + if (fDatasetName) + printf("= Custom name for the dataset to be created: ___ %s\n", fDatasetName.Data()); + printf("= Name of the generated JDL: ___________________ %s\n", fJDLName.Data()); + if (fIncludePath.Data()) + printf("= Include path for runtime task compilation: ___ %s\n", fIncludePath.Data()); + if (fCloseSE.Length()) + printf("= Force job outputs to storage element: ________ %s\n", fCloseSE.Data()); + if (fFriendChainName.Length()) + printf("= Open friend chain file on worker: ____________ %s\n", fFriendChainName.Data()); + if (fPackages) { + TIter next(fPackages); + TObject *obj; + TString list; + while ((obj=next())) list += obj->GetName(); + printf("= Par files to be used: ________________________ %s\n", list.Data()); + } +} + //______________________________________________________________________________ void AliAnalysisAlien::SetDefaults() { // Set default values for everything. What cannot be filled will be left empty. if (fGridJDL) delete fGridJDL; fGridJDL = (TGridJDL*)gROOT->ProcessLine("new TAlienJDL()"); + fMergingJDL = (TGridJDL*)gROOT->ProcessLine("new TAlienJDL()"); fPrice = 1; fTTL = 30000; fSplitMaxInputFileNumber = 100; fMaxInitFailed = 0; fMasterResubmitThreshold = 0; fNtestFiles = 10; + fRunRange[0] = 0; + fRunRange[1] = 0; + fNrunsPerMaster = 1; + fMaxMergeFiles = 100; fRunNumbers = ""; fExecutable = "analysis.sh"; + fExecutableCommand = "root -b -q"; fArguments = ""; + fExecutableArgs = ""; fAnalysisMacro = "myAnalysis.C"; fAnalysisSource = ""; fAdditionalLibs = ""; @@ -843,14 +1582,123 @@ void AliAnalysisAlien::SetDefaults() fGridWorkingDir = ""; fGridDataDir = ""; // Can be like: /alice/sim/PDC_08a/LHC08c9/ fDataPattern = "*AliESDs.root"; // Can be like: *AliESDs.root, */pass1/*AliESDs.root, ... + fFriendChainName = ""; fGridOutputDir = "output"; fOutputArchive = "log_archive.zip:stdout,stderr root_archive.zip:*.root"; fOutputFiles = ""; // Like "AliAODs.root histos.root" fInputFormat = "xml-single"; fJDLName = "analysis.jdl"; + fJobTag = "Automatically generated analysis JDL"; fMergeExcludes = ""; + fMergeViaJDL = 0; + SetUseCopy(kTRUE); + SetCheckCopy(kTRUE); } +//______________________________________________________________________________ +Bool_t AliAnalysisAlien::MergeOutput(const char *output, const char *basedir, Int_t nmaxmerge) +{ +// Merge all registered outputs from basedir. + TString output_file = output; + TString command; + TString output_chunk; + TString previous_chunk = ""; + Int_t count_chunk = 0; + Int_t count_zero = nmaxmerge; + Bool_t merged = kTRUE; + Int_t index = output_file.Index("@"); + if (index > 0) output_file.Remove(index); + command = Form("find %s/ *%s", basedir, output_file.Data()); + printf("command: %s\n", command.Data()); + TGridResult *res = gGrid->Command(command); + if (!res) { + printf("Error: No result for the find command\n"); + return kFALSE; + } + + TFileMerger *fm = 0; + TIter nextmap(res); + TMap *map = 0; + // Check if there is a merge operation to resume + output_chunk = output_file; + output_chunk.ReplaceAll(".root", "_*.root"); + // Check for existent temporary merge files + if (!gSystem->Exec(Form("ls %s", output_chunk.Data()))) { + while (1) { + // Skip as many input files as in a chunk + for (Int_t counter=0; counter, nentries=%d", output_file.Data(), res->GetSize()); + delete res; + return kFALSE; + } + output_chunk = output_file; + output_chunk.ReplaceAll(".root", Form("_%04d.root", count_chunk)); + count_chunk++; + if (gSystem->AccessPathName(output_chunk)) continue; + // Merged file with chunks up to found + printf("Resume merging of <%s> from <%s>\n", output_file.Data(), output_chunk.Data()); + previous_chunk = output_chunk; + break; + } + } + count_zero = nmaxmerge; + + while ((map=(TMap*)nextmap())) { + // Loop 'find' results and get next LFN + if (count_zero == nmaxmerge) { + // First file in chunk - create file merger and add previous chunk if any. + fm = new TFileMerger(kFALSE); + fm->SetFastMethod(kTRUE); + if (previous_chunk.Length()) fm->AddFile(previous_chunk.Data()); + output_chunk = output_file; + output_chunk.ReplaceAll(".root", Form("_%04d.root", count_chunk)); + } + // If last file found, put merged results in the output file + if (map == res->Last()) output_chunk = output_file; + TObjString *objs = dynamic_cast(map->GetValue("turl")); + if (!objs || !objs->GetString().Length()) { + // Nothing found - skip this output + delete res; + delete fm; + return kFALSE; + } + // Add file to be merged and decrement chunk counter. + fm->AddFile(objs->GetString()); + count_zero--; + if (count_zero==0 || map == res->Last()) { + fm->OutputFile(output_chunk); + if (!fm->GetMergeList() || !fm->GetMergeList()->GetSize()) { + // Nothing found - skip this output + ::Warning("MergeOutputs", "No <%s> files found.", output_file.Data()); + delete res; + delete fm; + return kFALSE; + } + // Merge the outputs, then go to next chunk + if (!fm->Merge()) { + ::Error("MergeOutputs", "Could not merge all <%s> files", output_file.Data()); + delete res; + delete fm; + merged = kFALSE; + return kFALSE; + } else { + ::Info("MergeOutputs", "\n##### Merged %d output files to <%s>", fm->GetMergeList()->GetSize(), output_chunk.Data()); + gSystem->Unlink(previous_chunk); + } + if (map == res->Last()) { + delete res; + delete fm; + break; + } + count_chunk++; + count_zero = nmaxmerge; + previous_chunk = output_chunk; + } + } + return merged; +} + //______________________________________________________________________________ Bool_t AliAnalysisAlien::MergeOutputs() { @@ -860,69 +1708,66 @@ Bool_t AliAnalysisAlien::MergeOutputs() if (!Connect()) { Error("MergeOutputs", "Cannot merge outputs without grid connection. Terminate will NOT be executed"); return kFALSE; + } + if (fMergeViaJDL) { + if (!TestBit(AliAnalysisGrid::kMerge)) { + Info("MergeOutputs", "### Re-run with option in terminate mode of the plugin to submit merging jobs ###"); + return kFALSE; + } + if (fProductionMode) { + Info("MergeOutputs", "### Merging will be submitted by LPM manager... ###"); + return kFALSE; + } + Info("MergeOutputs", "Submitting merging JDL"); + if (!SubmitMerging()) return kFALSE; + Info("MergeOutputs", "### Re-run with off to collect results after merging jobs are done ###"); + Info("MergeOutputs", "### The Terminate() method is executed by the merging jobs"); + return kFALSE; } // Get the output path - TString output = Form("/%s/%s/%s", gGrid->GetHomeDirectory(), fGridWorkingDir.Data(), fGridOutputDir.Data()); - if (!gGrid->Cd(output)) output = Form("/%s/%s", gGrid->GetHomeDirectory(), fGridOutputDir.Data()); - if (!gGrid->Cd(output)) { + if (!fGridOutputDir.Contains("/")) fGridOutputDir = Form("/%s/%s/%s", gGrid->GetHomeDirectory(), fGridWorkingDir.Data(), fGridOutputDir.Data()); + if (!DirectoryExists(fGridOutputDir)) { Error("MergeOutputs", "Grid output directory %s not found. Terminate() will NOT be executed", fGridOutputDir.Data()); return kFALSE; } if (!fOutputFiles.Length()) { Error("MergeOutputs", "No output file names defined. Are you running the right AliAnalysisAlien configuration ?"); return kFALSE; + } + // Check if fast read option was requested + if (fFastReadOption) { + Warning("MergeOutputs", "You requested FastRead option. Using xrootd flags to reduce timeouts. This may skip some files that could be accessed ! \ + \n+++ NOTE: To disable this option, use: plugin->SetFastReadOption(kFALSE)"); + gEnv->SetValue("XNet.ConnectTimeout",10); + gEnv->SetValue("XNet.RequestTimeout",10); + gEnv->SetValue("XNet.MaxRedirectCount",2); + gEnv->SetValue("XNet.ReconnectTimeout",10); + gEnv->SetValue("XNet.FirstConnectMaxCnt",1); } TObjArray *list = fOutputFiles.Tokenize(" "); TIter next(list); TObjString *str; - TString command; TString output_file; Bool_t merged = kTRUE; while((str=(TObjString*)next())) { output_file = str->GetString(); Int_t index = output_file.Index("@"); if (index > 0) output_file.Remove(index); + // Skip already merged outputs + if (!gSystem->AccessPathName(output_file)) { + Info("MergeOutputs", "Output file <%s> found. Not merging again.", output_file.Data()); + continue; + } if (fMergeExcludes.Length() && fMergeExcludes.Contains(output_file.Data())) continue; - command = Form("find %s/ *%s", output.Data(), output_file.Data()); - printf("command: %s\n", command.Data()); - TGridResult *res = gGrid->Command(command); - if (!res) continue; - TFileMerger *fm = 0; - TIter nextmap(res); - TMap *map; - while ((map=(TMap*)nextmap())) { - TObjString *objs = dynamic_cast(map->GetValue("turl")); - if (!objs || !objs->GetString().Length()) { - delete res; - continue; - } - if (!fm) { - fm = new TFileMerger(kFALSE); - fm->SetFastMethod(kTRUE); - fm->OutputFile(output_file); - } - fm->AddFile(objs->GetString()); - } - if (!fm || !fm->GetMergeList() || !fm->GetMergeList()->GetSize()) { - Warning("MergeOutputs", "No <%s> files found.", output_file.Data()); - merged = kFALSE; - delete res; - continue; - } - if (!fm->Merge()) { - Error("MergeOutputs", "Could not merge all <%s> files", output_file.Data()); - merged = kFALSE; - } else { - Info("MergeOutputs", "\n##### Merged %d output files <%s>", fm->GetMergeList()->GetSize(), output_file.Data()); + // Perform a 'find' command in the output directory, looking for registered outputs + merged = MergeOutput(output_file, fGridOutputDir, fMaxMergeFiles); + if (!merged) { + Error("MergeOutputs", "Terminate() will NOT be executed"); + return kFALSE; } - delete fm; - delete res; } - if (!merged) { - Error("MergeOutputs", "Terminate() will NOT be executed"); - } - return merged; + return kTRUE; } //______________________________________________________________________________ @@ -937,7 +1782,7 @@ void AliAnalysisAlien::SetDefaultOutputs(Bool_t flag) } //______________________________________________________________________________ -void AliAnalysisAlien::StartAnalysis(Long64_t /*nentries*/, Long64_t /*firstEntry*/) +Bool_t AliAnalysisAlien::StartAnalysis(Long64_t /*nentries*/, Long64_t /*firstEntry*/) { // Start remote grid analysis. @@ -946,7 +1791,7 @@ void AliAnalysisAlien::StartAnalysis(Long64_t /*nentries*/, Long64_t /*firstEntr AliAnalysisManager *mgr = AliAnalysisManager::GetAnalysisManager(); if (!mgr || !mgr->IsInitialized()) { Error("StartAnalysis", "You need an initialized analysis manager for this"); - return; + return kFALSE; } fOutputFiles = ""; TIter next(mgr->GetOutputs()); @@ -957,6 +1802,7 @@ void AliAnalysisAlien::StartAnalysis(Long64_t /*nentries*/, Long64_t /*firstEntr if (!mgr->GetOutputEventHandler()) continue; filename = mgr->GetOutputEventHandler()->GetOutputFileName(); } + if (fOutputFiles.Contains(filename)) continue; if (fOutputFiles.Length()) fOutputFiles += " "; fOutputFiles += filename; } @@ -964,7 +1810,7 @@ void AliAnalysisAlien::StartAnalysis(Long64_t /*nentries*/, Long64_t /*firstEntr if (mgr->GetExtraFiles().Length()) { if (fOutputFiles.Length()) fOutputFiles += " "; fOutputFiles += mgr->GetExtraFiles(); - } + } } // if (!fCloseSE.Length()) fCloseSE = gSystem->Getenv("alien_CLOSE_SE"); if (TestBit(AliAnalysisGrid::kOffline)) { @@ -979,26 +1825,42 @@ void AliAnalysisAlien::StartAnalysis(Long64_t /*nentries*/, Long64_t /*firstEntr \n space and job submitted."); } else if (TestBit(AliAnalysisGrid::kMerge)) { Info("StartAnalysis","\n##### MERGE MODE ##### The registered outputs of the analysis will be merged"); - return; + if (fMergeViaJDL) CheckInputData(); + return kTRUE; } else { Info("StartAnalysis","\n##### FULL ANALYSIS MODE ##### Producing needed files and submitting your analysis job..."); } + Print(); if (!Connect()) { Error("StartAnalysis", "Cannot start grid analysis without grid connection"); - return; - } + return kFALSE; + } + if (IsCheckCopy()) CheckFileCopy(gGrid->GetHomeDirectory()); if (!CheckInputData()) { Error("StartAnalysis", "There was an error in preprocessing your requested input data"); - return; + return kFALSE; + } + if (!CreateDataset(fDataPattern)) { + TString serror; + if (!fRunNumbers.Length() && !fRunRange[0]) serror = Form("path to data directory: <%s>", fGridDataDir.Data()); + if (fRunNumbers.Length()) serror = "run numbers"; + if (fRunRange[0]) serror = Form("run range [%d, %d]", fRunRange[0], fRunRange[1]); + serror += Form("\n or data pattern <%s>", fDataPattern.Data()); + Error("StartAnalysis", "No data to process. Please fix %s in your plugin configuration.", serror.Data()); + return kFALSE; } - CreateDataset(fDataPattern); WriteAnalysisFile(); WriteAnalysisMacro(); WriteExecutable(); WriteValidationScript(); - if (!CreateJDL()) return; - if (TestBit(AliAnalysisGrid::kOffline)) return; + if (fMergeViaJDL) { + WriteMergingMacro(); + WriteMergeExecutable(); + WriteValidationScript(kTRUE); + } + if (!CreateJDL()) return kFALSE; + if (TestBit(AliAnalysisGrid::kOffline)) return kFALSE; if (TestBit(AliAnalysisGrid::kTest)) { // Locally testing the analysis Info("StartAnalysis", "\n_______________________________________________________________________ \ @@ -1016,39 +1878,199 @@ void AliAnalysisAlien::StartAnalysis(Long64_t /*nentries*/, Long64_t /*firstEntr } delete list; gSystem->Exec(Form("bash %s 2>stderr", fExecutable.Data())); - gSystem->Exec("bash validate.sh"); + TString validationScript = fExecutable; + validationScript.ReplaceAll(".sh", "_validation.sh"); + gSystem->Exec(Form("bash %s",validationScript.Data())); // gSystem->Exec("cat stdout"); - return; + return kFALSE; } - // Submit AliEn job - CdWork(); - TGridResult *res = gGrid->Command(Form("submit %s", fJDLName.Data())); + // Check if submitting is managed by LPM manager + if (fProductionMode) { + TString prodfile = fJDLName; + prodfile.ReplaceAll(".jdl", ".prod"); + WriteProductionFile(prodfile); + Info("StartAnalysis", "Job submitting is managed by LPM. Rerun in terminate mode after jobs finished."); + return kFALSE; + } + // Submit AliEn job(s) + gGrid->Cd(fGridOutputDir); + TGridResult *res; TString jobID = ""; - if (res) { - const char *cjobId = res->GetKey(0,"jobId"); - if (!cjobId) { - Error("StartAnalysis", "Your JDL %s could not be submitted", fJDLName.Data()); - return; + if (!fRunNumbers.Length() && !fRunRange[0]) { + // Submit a given xml or a set of runs + res = gGrid->Command(Form("submit %s", fJDLName.Data())); + printf("*************************** %s\n",Form("submit %s", fJDLName.Data())); + if (res) { + const char *cjobId = res->GetKey(0,"jobId"); + if (!cjobId) { + gGrid->Stdout(); + gGrid->Stderr(); + Error("StartAnalysis", "Your JDL %s could not be submitted", fJDLName.Data()); + return kFALSE; + } else { + Info("StartAnalysis", "\n_______________________________________________________________________ \ + \n##### Your JDL %s was successfully submitted. \nTHE JOB ID IS: %s \ + \n_______________________________________________________________________", + fJDLName.Data(), cjobId); + jobID = cjobId; + } + delete res; } else { - Info("StartAnalysis", "\n_______________________________________________________________________ \ - \n##### Your JDL %s was successfully submitted. \nTHE JOB ID IS: %s \ - \n_______________________________________________________________________", - fJDLName.Data(), cjobId); - jobID = cjobId; - } - delete res; + Error("StartAnalysis", "No grid result after submission !!! Bailing out..."); + return kFALSE; + } + } else { + // Submit for a range of enumeration of runs. + if (!Submit()) return kFALSE; } + Info("StartAnalysis", "\n#### STARTING AN ALIEN SHELL FOR YOU. EXIT WHEN YOUR JOB %s HAS FINISHED. #### \ \n You may exit at any time and terminate the job later using the option \ \n ##################################################################################", jobID.Data()); - //gGrid->Shell(); gSystem->Exec("aliensh"); + return kTRUE; +} + +//______________________________________________________________________________ +Bool_t AliAnalysisAlien::Submit() +{ +// Submit all master jobs. + Int_t nmasterjobs = fInputFiles->GetEntries(); + Long_t tshoot = gSystem->Now(); + if (!fNsubmitted && !SubmitNext()) return kFALSE; + while (fNsubmitted < nmasterjobs) { + Long_t now = gSystem->Now(); + if ((now-tshoot)>30000) { + tshoot = now; + if (!SubmitNext()) return kFALSE; + } + } + return kTRUE; +} + +//______________________________________________________________________________ +Bool_t AliAnalysisAlien::SubmitMerging() +{ +// Submit all merging jobs. + if (!fGridOutputDir.Contains("/")) fGridOutputDir = Form("/%s/%s/%s", gGrid->GetHomeDirectory(), fGridWorkingDir.Data(), fGridOutputDir.Data()); + gGrid->Cd(fGridOutputDir); + TString mergeJDLName = fExecutable; + mergeJDLName.ReplaceAll(".sh", "_merge.jdl"); + Int_t ntosubmit = fInputFiles->GetEntries(); + printf("### Submitting %d merging jobs...\n", ntosubmit); + for (Int_t i=0; iBaseName(fInputFiles->At(i)->GetName()); + runOutDir.ReplaceAll(".xml", ""); + if (fOutputToRunNo) + query = Form("submit %s %s", mergeJDLName.Data(), runOutDir.Data()); + else + query = Form("submit %s %03d", mergeJDLName.Data(), i); + printf("********* %s\n",query.Data()); + TGridResult *res = gGrid->Command(query); + if (res) { + const char *cjobId = res->GetKey(0,"jobId"); + if (!cjobId) { + gGrid->Stdout(); + gGrid->Stderr(); + Error("StartAnalysis", "Your JDL %s could not be submitted", mergeJDLName.Data()); + return kFALSE; + } else { + Info("StartAnalysis", "\n_______________________________________________________________________ \ + \n##### Your JDL %s was successfully submitted. \nTHE JOB ID IS: %s \ + \n_______________________________________________________________________", + mergeJDLName.Data(), cjobId); + } + delete res; + } else { + Error("SubmitMerging", "No grid result after submission !!! Bailing out..."); + return kFALSE; + } + } + if (!ntosubmit) return kTRUE; + Info("StartAnalysis", "\n#### STARTING AN ALIEN SHELL FOR YOU. EXIT WHEN YOUR MERGING JOBS HAVE FINISHED. #### \ + \n You may exit at any time and terminate the job later using the option but disabling SetMergeViaJDL\ + \n ##################################################################################"); + gSystem->Exec("aliensh"); + return kTRUE; +} + +//______________________________________________________________________________ +Bool_t AliAnalysisAlien::SubmitNext() +{ +// Submit next bunch of master jobs if the queue is free. + static Bool_t iscalled = kFALSE; + static Int_t firstmaster = 0; + static Int_t lastmaster = 0; + static Int_t npermaster = 0; + if (iscalled) return kTRUE; + iscalled = kTRUE; + Int_t nrunning=0, nwaiting=0, nerror=0, ndone=0; + Int_t ntosubmit = 0; + TGridResult *res; + TString jobID = ""; + if (!fNsubmitted) ntosubmit = 1; + else { + TString status = GetJobStatus(firstmaster, lastmaster, nrunning, nwaiting, nerror, ndone); + printf("=== master %d: %s\n", lastmaster, status.Data()); + // If last master not split, just return + if (status != "SPLIT") {iscalled = kFALSE; return kTRUE;} + // No more than 100 waiting jobs + if (nwaiting>100) {iscalled = kFALSE; return kTRUE;} + npermaster = (nrunning+nwaiting+nerror+ndone)/fNsubmitted; + if (npermaster) ntosubmit = (100-nwaiting)/npermaster; + if (!ntosubmit) ntosubmit = 1; + printf("=== WAITING(%d) RUNNING(%d) DONE(%d) OTHER(%d) NperMaster=%d => to submit %d jobs\n", + nwaiting, nrunning, ndone, nerror, npermaster, ntosubmit); + } + Int_t nmasterjobs = fInputFiles->GetEntries(); + for (Int_t i=0; i=nmasterjobs) {iscalled = kFALSE; return kTRUE;} + TString query; + TString runOutDir = gSystem->BaseName(fInputFiles->At(fNsubmitted)->GetName()); + runOutDir.ReplaceAll(".xml", ""); + if (fOutputToRunNo) + query = Form("submit %s %s %s", fJDLName.Data(), fInputFiles->At(fNsubmitted)->GetName(), runOutDir.Data()); + else + query = Form("submit %s %s %03d", fJDLName.Data(), fInputFiles->At(fNsubmitted)->GetName(), fNsubmitted); + printf("********* %s\n",query.Data()); + res = gGrid->Command(query); + if (res) { + TString cjobId1 = res->GetKey(0,"jobId"); + if (!cjobId1.Length()) { + iscalled = kFALSE; + gGrid->Stdout(); + gGrid->Stderr(); + Error("StartAnalysis", "Your JDL %s could not be submitted. The message was:", fJDLName.Data()); + return kFALSE; + } else { + Info("StartAnalysis", "\n_______________________________________________________________________ \ + \n##### Your JDL %s submitted (%d to go). \nTHE JOB ID IS: %s \ + \n_______________________________________________________________________", + fJDLName.Data(), nmasterjobs-fNsubmitted-1, cjobId1.Data()); + jobID += cjobId1; + jobID += " "; + lastmaster = cjobId1.Atoi(); + if (!firstmaster) firstmaster = lastmaster; + fNsubmitted++; + } + delete res; + } else { + Error("StartAnalysis", "No grid result after submission !!! Bailing out..."); + return kFALSE; + } + } + iscalled = kFALSE; + return kTRUE; } //______________________________________________________________________________ void AliAnalysisAlien::WriteAnalysisFile() { -// Write current analysis manager into the file analysis.root +// Write current analysis manager into the file + TString analysisFile = fExecutable; + analysisFile.ReplaceAll(".sh", ".root"); if (!TestBit(AliAnalysisGrid::kSubmit)) { AliAnalysisManager *mgr = AliAnalysisManager::GetAnalysisManager(); if (!mgr || !mgr->IsInitialized()) { @@ -1064,13 +2086,19 @@ void AliAnalysisAlien::WriteAnalysisFile() if (handler->InheritsFrom("AliAODInputHandler")) TObject::SetBit(AliAnalysisGrid::kUseAOD); } TDirectory *cdir = gDirectory; - TFile *file = TFile::Open("analysis.root", "RECREATE"); + TFile *file = TFile::Open(analysisFile, "RECREATE"); if (file) { + // Skip task Terminate calls for the grid job + mgr->SetSkipTerminate(kTRUE); + // Unless merging makes no sense + if (IsSingleOutput()) mgr->SetSkipTerminate(kFALSE); mgr->Write(); delete file; + // Enable termination for local jobs + mgr->SetSkipTerminate(kFALSE); } if (cdir) cdir->cd(); - Info("WriteAnalysisFile", "\n##### Analysis manager: %s wrote to file \n", mgr->GetName()); + Info("WriteAnalysisFile", "\n##### Analysis manager: %s wrote to file <%s>\n", mgr->GetName(),analysisFile.Data()); } Bool_t copy = kTRUE; if (TestBit(AliAnalysisGrid::kOffline) || TestBit(AliAnalysisGrid::kTest)) copy = kFALSE; @@ -1078,9 +2106,9 @@ void AliAnalysisAlien::WriteAnalysisFile() CdWork(); TString workdir = gGrid->GetHomeDirectory(); workdir += fGridWorkingDir; - Info("CreateJDL", "\n##### Copying file containing your initialized analysis manager to your alien workspace"); - if (FileExists("analysis.root")) gGrid->Rm("analysis.root"); - TFile::Cp("file:analysis.root", Form("alien://%s/analysis.root", workdir.Data())); + Info("CreateJDL", "\n##### Copying file <%s> containing your initialized analysis manager to your alien workspace", analysisFile.Data()); + if (FileExists(analysisFile)) gGrid->Rm(analysisFile); + TFile::Cp(Form("file:%s",analysisFile.Data()), Form("alien://%s/%s", workdir.Data(),analysisFile.Data())); } } @@ -1095,6 +2123,12 @@ void AliAnalysisAlien::WriteAnalysisMacro() Error("WriteAnalysisMacro", "could not open file %s for writing", fAnalysisMacro.Data()); return; } + Bool_t hasSTEERBase = kFALSE; + Bool_t hasESD = kFALSE; + Bool_t hasAOD = kFALSE; + Bool_t hasANALYSIS = kFALSE; + Bool_t hasANALYSISalice = kFALSE; + Bool_t hasCORRFW = kFALSE; TString func = fAnalysisMacro; TString type = "ESD"; TString comment = "// Analysis using "; @@ -1103,6 +2137,10 @@ void AliAnalysisAlien::WriteAnalysisMacro() type = "AOD"; comment += "AOD"; } + if (type!="AOD" && fFriendChainName!="") { + Error("WriteAnalysisMacro", "Friend chain can be attached only to AOD"); + return; + } if (TObject::TestBit(AliAnalysisGrid::kUseMC)) comment += "/MC"; else comment += " data"; out << "const char *anatype = \"" << type.Data() << "\";" << endl << endl; @@ -1118,6 +2156,22 @@ void AliAnalysisAlien::WriteAnalysisMacro() out << " gSystem->Load(\"libGeom\");" << endl; out << " gSystem->Load(\"libVMC\");" << endl; out << " gSystem->Load(\"libPhysics\");" << endl << endl; + out << " gSystem->Load(\"libMinuit\");" << endl << endl; + if (fAdditionalRootLibs.Length()) { + // in principle libtree /lib geom libvmc etc. can go into this list, too + out << "// Add aditional libraries" << endl; + TObjArray *list = fAdditionalRootLibs.Tokenize(" "); + TIter next(list); + TObjString *str; + while((str=(TObjString*)next())) { + if (str->GetString().Contains(".so")) + out << " gSystem->Load(\"" << str->GetString().Data() << "\");" << endl; + } + if (list) delete list; + } + out << "// include path" << endl; + if (fIncludePath.Length()) out << " gSystem->AddIncludePath(\"" << fIncludePath.Data() << "\");" << endl; + out << " gSystem->AddIncludePath(\"-I$ALICE_ROOT/include\");" << endl << endl; out << "// Load analysis framework libraries" << endl; if (!fPackages) { out << " gSystem->Load(\"libSTEERBase\");" << endl; @@ -1130,48 +2184,54 @@ void AliAnalysisAlien::WriteAnalysisMacro() TIter next(fPackages); TObject *obj; TString pkgname; - Bool_t hasSTEERBase = kFALSE; - Bool_t hasESD = kFALSE; - Bool_t hasAOD = kFALSE; - Bool_t hasANALYSIS = kFALSE; - Bool_t hasANALYSISalice = kFALSE; - Bool_t hasCORRFW = kFALSE; + TString setupPar = "AliAnalysisAlien::SetupPar"; while ((obj=next())) { pkgname = obj->GetName(); - if (pkgname.Contains("STEERBase")) hasSTEERBase = kTRUE; - if (pkgname.Contains("ESD")) hasESD = kTRUE; - if (pkgname.Contains("AOD")) hasAOD = kTRUE; - if (pkgname.Contains("ANALYSIS") && !pkgname.Contains("ANALYSISalice")) hasANALYSIS = kTRUE; - if (pkgname.Contains("ANALYSISalice")) hasANALYSISalice = kTRUE; - if (pkgname.Contains("CORRFW")) hasCORRFW = kTRUE; - } + if (pkgname == "STEERBase" || + pkgname == "STEERBase.par") hasSTEERBase = kTRUE; + if (pkgname == "ESD" || + pkgname == "ESD.par") hasESD = kTRUE; + if (pkgname == "AOD" || + pkgname == "AOD.par") hasAOD = kTRUE; + if (pkgname == "ANALYSIS" || + pkgname == "ANALYSIS.par") hasANALYSIS = kTRUE; + if (pkgname == "ANALYSISalice" || + pkgname == "ANALYSISalice.par") hasANALYSISalice = kTRUE; + if (pkgname == "CORRFW" || + pkgname == "CORRFW.par") hasCORRFW = kTRUE; + } + if (hasANALYSISalice) setupPar = "SetupPar"; if (!hasSTEERBase) out << " gSystem->Load(\"libSTEERBase\");" << endl; - else out << " if (!SetupPar(\"STEERBase\")) return;" << endl; + else out << " if (!" << setupPar << "(\"STEERBase\")) return;" << endl; if (!hasESD) out << " gSystem->Load(\"libESD\");" << endl; - else out << " if (!SetupPar(\"ESD\")) return;" << endl; + else out << " if (!" << setupPar << "(\"ESD\")) return;" << endl; if (!hasAOD) out << " gSystem->Load(\"libAOD\");" << endl; - else out << " if (!SetupPar(\"AOD\")) return;" << endl; + else out << " if (!" << setupPar << "(\"AOD\")) return;" << endl; if (!hasANALYSIS) out << " gSystem->Load(\"libANALYSIS\");" << endl; - else out << " if (!SetupPar(\"ANALYSIS\")) return;" << endl; + else out << " if (!" << setupPar << "(\"ANALYSIS\")) return;" << endl; if (!hasANALYSISalice) out << " gSystem->Load(\"libANALYSISalice\");" << endl; - else out << " if (!SetupPar(\"ANALYSISalice\")) return;" << endl; + else out << " if (!" << setupPar << "(\"ANALYSISalice\")) return;" << endl; if (!hasCORRFW) out << " gSystem->Load(\"libCORRFW\");" << endl << endl; - else out << " if (!SetupPar(\"CORRFW\")) return;" << endl << endl; + else out << " if (!" << setupPar << "(\"CORRFW\")) return;" << endl << endl; out << "// Compile other par packages" << endl; next.Reset(); while ((obj=next())) { pkgname = obj->GetName(); - if (pkgname.Contains("STEERBase") || - pkgname.Contains("ESD") || - pkgname.Contains("AOD") || - pkgname.Contains("ANALYSIS") || - pkgname.Contains("CORRFW")) continue; - out << " if (!SetupPar(\"" << obj->GetName() << "\")) return;" << endl; + if (pkgname == "STEERBase" || + pkgname == "STEERBase.par" || + pkgname == "ESD" || + pkgname == "ESD.par" || + pkgname == "AOD" || + pkgname == "AOD.par" || + pkgname == "ANALYSIS" || + pkgname == "ANALYSIS.par" || + pkgname == "ANALYSISalice" || + pkgname == "ANALYSISalice.par" || + pkgname == "CORRFW" || + pkgname == "CORRFW.par") continue; + out << " if (!" << setupPar << "(\"" << obj->GetName() << "\")) return;" << endl; } } - out << "// include path" << endl; - if (fIncludePath.Length()) out << " gSystem->AddIncludePath(\"" << fIncludePath.Data() << "\");" << endl; - out << " gSystem->AddIncludePath(\"-I$ALICE_ROOT/include\");" << endl << endl; if (fAdditionalLibs.Length()) { out << "// Add aditional AliRoot libraries" << endl; TObjArray *list = fAdditionalLibs.Tokenize(" "); @@ -1195,15 +2255,38 @@ void AliAnalysisAlien::WriteAnalysisMacro() if (list) delete list; } out << endl; + if (fFastReadOption) { + Warning("WriteAnalysisMacro", "!!! You requested FastRead option. Using xrootd flags to reduce timeouts in the grid jobs. This may skip some files that could be accessed !!! \ + \n+++ NOTE: To disable this option, use: plugin->SetFastReadOption(kFALSE)"); + out << "// fast xrootd reading enabled" << endl; + out << " printf(\"!!! You requested FastRead option. Using xrootd flags to reduce timeouts. Note that this may skip some files that could be accessed !!!\");" << endl; + out << " gEnv->SetValue(\"XNet.ConnectTimeout\",10);" << endl; + out << " gEnv->SetValue(\"XNet.RequestTimeout\",10);" << endl; + out << " gEnv->SetValue(\"XNet.MaxRedirectCount\",2);" << endl; + out << " gEnv->SetValue(\"XNet.ReconnectTimeout\",10);" << endl; + out << " gEnv->SetValue(\"XNet.FirstConnectMaxCnt\",1);" << endl << endl; + } out << "// connect to AliEn and make the chain" << endl; out << " if (!TGrid::Connect(\"alien://\")) return;" << endl; if (IsUsingTags()) { out << " TChain *chain = CreateChainFromTags(\"wn.xml\", anatype);" << endl << endl; } else { - out << " TChain *chain = CreateChain(\"wn.xml\", anatype);" << endl << endl; + if(fFriendChainName!="AliAOD.VertexingHF.root") { + out << " TChain *chain = CreateChain(\"wn.xml\", anatype);" << endl << endl; + } else { + out << " // Check if the macro to create the chain was provided" << endl; + out << " if (gSystem->AccessPathName(\"MakeAODInputChain.C\")) {" << endl; + out << " ::Error(\"" << func.Data() << "\", \"File MakeAODInputChain.C not provided. Aborting.\");" << endl; + out << " return;" << endl; + out << " }" << endl; + out << " gROOT->LoadMacro(\"MakeAODInputChain.C\");" << endl; + out << " TChain *chain = MakeAODInputChain(\"wn.xml\",\"none\");" << endl << endl; + } } out << "// read the analysis manager from file" << endl; - out << " TFile *file = TFile::Open(\"analysis.root\");" << endl; + TString analysisFile = fExecutable; + analysisFile.ReplaceAll(".sh", ".root"); + out << " TFile *file = TFile::Open(\"" << analysisFile << "\");" << endl; out << " if (!file) return;" << endl; out << " TIter nextkey(file->GetListOfKeys());" << endl; out << " AliAnalysisManager *mgr = 0;" << endl; @@ -1213,10 +2296,17 @@ void AliAnalysisAlien::WriteAnalysisMacro() out << " mgr = (AliAnalysisManager*)file->Get(key->GetName());" << endl; out << " };" << endl; out << " if (!mgr) {" << endl; - out << " ::Error(\"" << func.Data() << "\", \"No analysis manager found in file analysis.root\");" << endl; + out << " ::Error(\"" << func.Data() << "\", \"No analysis manager found in file" << analysisFile <<"\");" << endl; out << " return;" << endl; out << " }" << endl << endl; out << " mgr->PrintStatus();" << endl; + if (AliAnalysisManager::GetAnalysisManager()) { + if (AliAnalysisManager::GetAnalysisManager()->GetDebugLevel()>3) { + out << " gEnv->SetValue(\"XNet.Debug\", \"1\");" << endl; + } else { + out << " AliLog::SetGlobalLogLevel(AliLog::kError);" << endl; + } + } out << " mgr->StartAnalysis(\"localfile\", chain);" << endl; out << " timer.Stop();" << endl; out << " timer.Print();" << endl; @@ -1242,7 +2332,13 @@ void AliAnalysisAlien::WriteAnalysisMacro() out << " gROOT->LoadMacro(\"ConfigureCuts.C\");" << endl; out << " ConfigureCuts(runCuts, lhcCuts, detCuts, evCuts);" << endl; out << " }" << endl; - out << " TChain *chain = tagAna->QueryTags(runCuts, lhcCuts, detCuts, evCuts);" << endl; + if (fFriendChainName=="") { + out << " TChain *chain = tagAna->QueryTags(runCuts, lhcCuts, detCuts, evCuts);" << endl; + } else { + out << " TString tmpColl=\"tmpCollection.xml\";" << endl; + out << " tagAna->CreateXMLCollection(tmpColl.Data(),runCuts, lhcCuts, detCuts, evCuts);" << endl; + out << " TChain *chain = CreateChain(tmpColl.Data(),type);" << endl; + } out << " if (!chain || !chain->GetNtrees()) return NULL;" << endl; out << " chain->ls();" << endl; out << " return chain;" << endl; @@ -1255,7 +2351,8 @@ void AliAnalysisAlien::WriteAnalysisMacro() msg += " AliEventTagCuts *evCuts)"; Info("WriteAnalysisMacro", msg.Data()); } - } else { + } + if (!IsUsingTags() || fFriendChainName!="") { out <<"//________________________________________________________________________________" << endl; out << "TChain* CreateChain(const char *xmlfile, const char *type=\"ESD\")" << endl; out << "{" << endl; @@ -1272,16 +2369,30 @@ void AliAnalysisAlien::WriteAnalysisMacro() out << " return NULL;" << endl; out << " }" << endl; out << " TChain *chain = new TChain(treename);" << endl; + if(fFriendChainName!="") { + out << " TChain *chainFriend = new TChain(treename);" << endl; + } out << " coll->Reset();" << endl; - out << " while (coll->Next()) chain->Add(coll->GetTURL(\"\"));" << endl; + out << " while (coll->Next()) {" << endl; + out << " chain->Add(coll->GetTURL(\"\"));" << endl; + if(fFriendChainName!="") { + out << " TString fileFriend=coll->GetTURL(\"\");" << endl; + out << " fileFriend.ReplaceAll(\"AliAOD.root\",\""<Add(fileFriend.Data());" << endl; + } + out << " }" << endl; out << " if (!chain->GetNtrees()) {" << endl; out << " ::Error(\"CreateChain\", \"No tree found from collection %s\", xmlfile);" << endl; out << " return NULL;" << endl; out << " }" << endl; + if(fFriendChainName!="") { + out << " chain->AddFriend(chainFriend);" << endl; + } out << " return chain;" << endl; out << "}" << endl << endl; } - if (fPackages) { + if (hasANALYSISalice) { out <<"//________________________________________________________________________________" << endl; out << "Bool_t SetupPar(const char *package) {" << endl; out << "// Compile the package and set it up." << endl; @@ -1340,6 +2451,303 @@ void AliAnalysisAlien::WriteAnalysisMacro() } } +//______________________________________________________________________________ +void AliAnalysisAlien::WriteMergingMacro() +{ +// Write a macro to merge the outputs per master job. + if (!fMergeViaJDL) return; + if (!fOutputFiles.Length()) { + Error("WriteMergingMacro", "No output file names defined. Are you running the right AliAnalysisAlien configuration ?"); + return; + } + TString mergingMacro = fExecutable; + mergingMacro.ReplaceAll(".sh","_merge.C"); + if (!fGridOutputDir.Contains("/")) fGridOutputDir = Form("/%s/%s/%s", gGrid->GetHomeDirectory(), fGridWorkingDir.Data(), fGridOutputDir.Data()); + if (!TestBit(AliAnalysisGrid::kSubmit)) { + ofstream out; + out.open(mergingMacro.Data(), ios::out); + if (!out.good()) { + Error("WriteMergingMacro", "could not open file %s for writing", fAnalysisMacro.Data()); + return; + } + Bool_t hasSTEERBase = kFALSE; + Bool_t hasESD = kFALSE; + Bool_t hasAOD = kFALSE; + Bool_t hasANALYSIS = kFALSE; + Bool_t hasANALYSISalice = kFALSE; + Bool_t hasCORRFW = kFALSE; + TString func = mergingMacro; + TString comment; + func.ReplaceAll(".C", ""); + out << "void " << func.Data() << "(const char *dir)" << endl; + out << "{" << endl; + out << "// Automatically generated merging macro executed in grid subjobs" << endl << endl; + out << " TStopwatch timer;" << endl; + out << " timer.Start();" << endl << endl; + out << "// load base root libraries" << endl; + out << " gSystem->Load(\"libTree\");" << endl; + out << " gSystem->Load(\"libGeom\");" << endl; + out << " gSystem->Load(\"libVMC\");" << endl; + out << " gSystem->Load(\"libPhysics\");" << endl << endl; + out << " gSystem->Load(\"libMinuit\");" << endl << endl; + if (fAdditionalRootLibs.Length()) { + // in principle libtree /lib geom libvmc etc. can go into this list, too + out << "// Add aditional libraries" << endl; + TObjArray *list = fAdditionalRootLibs.Tokenize(" "); + TIter next(list); + TObjString *str; + while((str=(TObjString*)next())) { + if (str->GetString().Contains(".so")) + out << " gSystem->Load(\"" << str->GetString().Data() << "\");" << endl; + } + if (list) delete list; + } + out << "// include path" << endl; + if (fIncludePath.Length()) out << " gSystem->AddIncludePath(\"" << fIncludePath.Data() << "\");" << endl; + out << " gSystem->AddIncludePath(\"-I$ALICE_ROOT/include\");" << endl << endl; + out << "// Load analysis framework libraries" << endl; + if (!fPackages) { + out << " gSystem->Load(\"libSTEERBase\");" << endl; + out << " gSystem->Load(\"libESD\");" << endl; + out << " gSystem->Load(\"libAOD\");" << endl; + out << " gSystem->Load(\"libANALYSIS\");" << endl; + out << " gSystem->Load(\"libANALYSISalice\");" << endl; + out << " gSystem->Load(\"libCORRFW\");" << endl << endl; + } else { + TIter next(fPackages); + TObject *obj; + TString pkgname; + TString setupPar = "AliAnalysisAlien::SetupPar"; + while ((obj=next())) { + pkgname = obj->GetName(); + if (pkgname == "STEERBase" || + pkgname == "STEERBase.par") hasSTEERBase = kTRUE; + if (pkgname == "ESD" || + pkgname == "ESD.par") hasESD = kTRUE; + if (pkgname == "AOD" || + pkgname == "AOD.par") hasAOD = kTRUE; + if (pkgname == "ANALYSIS" || + pkgname == "ANALYSIS.par") hasANALYSIS = kTRUE; + if (pkgname == "ANALYSISalice" || + pkgname == "ANALYSISalice.par") hasANALYSISalice = kTRUE; + if (pkgname == "CORRFW" || + pkgname == "CORRFW.par") hasCORRFW = kTRUE; + } + if (hasANALYSISalice) setupPar = "SetupPar"; + if (!hasSTEERBase) out << " gSystem->Load(\"libSTEERBase\");" << endl; + else out << " if (!" << setupPar << "(\"STEERBase\")) return;" << endl; + if (!hasESD) out << " gSystem->Load(\"libESD\");" << endl; + else out << " if (!" << setupPar << "(\"ESD\")) return;" << endl; + if (!hasAOD) out << " gSystem->Load(\"libAOD\");" << endl; + else out << " if (!" << setupPar << "(\"AOD\")) return;" << endl; + if (!hasANALYSIS) out << " gSystem->Load(\"libANALYSIS\");" << endl; + else out << " if (!" << setupPar << "(\"ANALYSIS\")) return;" << endl; + if (!hasANALYSISalice) out << " gSystem->Load(\"libANALYSISalice\");" << endl; + else out << " if (!" << setupPar << "(\"ANALYSISalice\")) return;" << endl; + if (!hasCORRFW) out << " gSystem->Load(\"libCORRFW\");" << endl << endl; + else out << " if (!" << setupPar << "(\"CORRFW\")) return;" << endl << endl; + out << "// Compile other par packages" << endl; + next.Reset(); + while ((obj=next())) { + pkgname = obj->GetName(); + if (pkgname == "STEERBase" || + pkgname == "STEERBase.par" || + pkgname == "ESD" || + pkgname == "ESD.par" || + pkgname == "AOD" || + pkgname == "AOD.par" || + pkgname == "ANALYSIS" || + pkgname == "ANALYSIS.par" || + pkgname == "ANALYSISalice" || + pkgname == "ANALYSISalice.par" || + pkgname == "CORRFW" || + pkgname == "CORRFW.par") continue; + out << " if (!" << setupPar << "(\"" << obj->GetName() << "\")) return;" << endl; + } + } + if (fAdditionalLibs.Length()) { + out << "// Add aditional AliRoot libraries" << endl; + TObjArray *list = fAdditionalLibs.Tokenize(" "); + TIter next(list); + TObjString *str; + while((str=(TObjString*)next())) { + if (str->GetString().Contains(".so")) + out << " gSystem->Load(\"" << str->GetString().Data() << "\");" << endl; + } + if (list) delete list; + } + out << endl; + out << "// Analysis source to be compiled at runtime (if any)" << endl; + if (fAnalysisSource.Length()) { + TObjArray *list = fAnalysisSource.Tokenize(" "); + TIter next(list); + TObjString *str; + while((str=(TObjString*)next())) { + out << " gROOT->ProcessLine(\".L " << str->GetString().Data() << "+g\");" << endl; + } + if (list) delete list; + } + out << endl; + if (fFastReadOption) { + Warning("WriteMergingMacro", "!!! You requested FastRead option. Using xrootd flags to reduce timeouts in the grid merging jobs. Note that this may skip some files that could be accessed !!!"); + out << "// fast xrootd reading enabled" << endl; + out << " printf(\"!!! You requested FastRead option. Using xrootd flags to reduce timeouts. Note that this may skip some files that could be accessed !!!\");" << endl; + out << " gEnv->SetValue(\"XNet.ConnectTimeout\",10);" << endl; + out << " gEnv->SetValue(\"XNet.RequestTimeout\",10);" << endl; + out << " gEnv->SetValue(\"XNet.MaxRedirectCount\",2);" << endl; + out << " gEnv->SetValue(\"XNet.ReconnectTimeout\",10);" << endl; + out << " gEnv->SetValue(\"XNet.FirstConnectMaxCnt\",1);" << endl << endl; + } + out << "// Connect to AliEn" << endl; + out << " if (!TGrid::Connect(\"alien://\")) return;" << endl; + out << " TString outputDir = \"" << fGridOutputDir << "/\";" << endl; + out << " outputDir += dir;" << endl; + out << " TString outputFiles = \"" << fOutputFiles << "\";" << endl; + out << " TString mergeExcludes = \"" << fMergeExcludes << "\";" << endl; + out << " mergeExcludes += \"" << AliAnalysisManager::GetAnalysisManager()->GetExtraFiles() << "\";" << endl; + out << " TObjArray *list = outputFiles.Tokenize(\" \");" << endl; + out << " TIter *iter = new TIter(list);" << endl; + out << " TObjString *str;" << endl; + out << " TString output_file;" << endl; + out << " Bool_t merged = kTRUE;" << endl; + out << " while((str=(TObjString*)iter->Next())) {" << endl; + out << " output_file = str->GetString();" << endl; + out << " Int_t index = output_file.Index(\"@\");" << endl; + out << " if (index > 0) output_file.Remove(index);" << endl; + out << " // Skip already merged outputs" << endl; + out << " if (!gSystem->AccessPathName(output_file)) {" << endl; + out << " printf(\"Output file <%s> found. Not merging again.\",output_file.Data());" << endl; + out << " continue;" << endl; + out << " }" << endl; + out << " if (mergeExcludes.Contains(output_file.Data())) continue;" << endl; + out << " merged = AliAnalysisAlien::MergeOutput(output_file, outputDir, " << fMaxMergeFiles << ");" << endl; + out << " if (!merged) {" << endl; + out << " printf(\"ERROR: Cannot merge %s\\n\", output_file.Data());" << endl; + out << " }" << endl; + out << " }" << endl; + out << "// read the analysis manager from file" << endl; + TString analysisFile = fExecutable; + analysisFile.ReplaceAll(".sh", ".root"); + out << " TFile *file = TFile::Open(\"" << analysisFile << "\");" << endl; + out << " if (!file) return;" << endl; + out << " TIter nextkey(file->GetListOfKeys());" << endl; + out << " AliAnalysisManager *mgr = 0;" << endl; + out << " TKey *key;" << endl; + out << " while ((key=(TKey*)nextkey())) {" << endl; + out << " if (!strcmp(key->GetClassName(), \"AliAnalysisManager\"))" << endl; + out << " mgr = (AliAnalysisManager*)file->Get(key->GetName());" << endl; + out << " };" << endl; + out << " if (!mgr) {" << endl; + out << " ::Error(\"" << func.Data() << "\", \"No analysis manager found in file" << analysisFile <<"\");" << endl; + out << " return;" << endl; + out << " }" << endl << endl; + out << " mgr->SetSkipTerminate(kFALSE);" << endl; + out << " mgr->PrintStatus();" << endl; + if (AliAnalysisManager::GetAnalysisManager()) { + if (AliAnalysisManager::GetAnalysisManager()->GetDebugLevel()>3) { + out << " gEnv->SetValue(\"XNet.Debug\", \"1\");" << endl; + } else { + out << " AliLog::SetGlobalLogLevel(AliLog::kError);" << endl; + } + } + out << " mgr->StartAnalysis(\"gridterminate\");" << endl; + out << "}" << endl << endl; + if (hasANALYSISalice) { + out <<"//________________________________________________________________________________" << endl; + out << "Bool_t SetupPar(const char *package) {" << endl; + out << "// Compile the package and set it up." << endl; + out << " TString pkgdir = package;" << endl; + out << " pkgdir.ReplaceAll(\".par\",\"\");" << endl; + out << " gSystem->Exec(Form(\"tar xvzf %s.par\", pkgdir.Data()));" << endl; + out << " TString cdir = gSystem->WorkingDirectory();" << endl; + out << " gSystem->ChangeDirectory(pkgdir);" << endl; + out << " // Check for BUILD.sh and execute" << endl; + out << " if (!gSystem->AccessPathName(\"PROOF-INF/BUILD.sh\")) {" << endl; + out << " printf(\"*******************************\\n\");" << endl; + out << " printf(\"*** Building PAR archive ***\\n\");" << endl; + out << " printf(\"*******************************\\n\");" << endl; + out << " if (gSystem->Exec(\"PROOF-INF/BUILD.sh\")) {" << endl; + out << " ::Error(\"SetupPar\", \"Cannot build par archive %s\", pkgdir.Data());" << endl; + out << " gSystem->ChangeDirectory(cdir);" << endl; + out << " return kFALSE;" << endl; + out << " }" << endl; + out << " } else {" << endl; + out << " ::Error(\"SetupPar\",\"Cannot access PROOF-INF/BUILD.sh for package %s\", pkgdir.Data());" << endl; + out << " gSystem->ChangeDirectory(cdir);" << endl; + out << " return kFALSE;" << endl; + out << " }" << endl; + out << " // Check for SETUP.C and execute" << endl; + out << " if (!gSystem->AccessPathName(\"PROOF-INF/SETUP.C\")) {" << endl; + out << " printf(\"*******************************\\n\");" << endl; + out << " printf(\"*** Setup PAR archive ***\\n\");" << endl; + out << " printf(\"*******************************\\n\");" << endl; + out << " gROOT->Macro(\"PROOF-INF/SETUP.C\");" << endl; + out << " } else {" << endl; + out << " ::Error(\"SetupPar\",\"Cannot access PROOF-INF/SETUP.C for package %s\", pkgdir.Data());" << endl; + out << " gSystem->ChangeDirectory(cdir);" << endl; + out << " return kFALSE;" << endl; + out << " }" << endl; + out << " // Restore original workdir" << endl; + out << " gSystem->ChangeDirectory(cdir);" << endl; + out << " return kTRUE;" << endl; + out << "}" << endl; + } + } + Bool_t copy = kTRUE; + if (TestBit(AliAnalysisGrid::kOffline) || TestBit(AliAnalysisGrid::kTest)) copy = kFALSE; + if (copy) { + CdWork(); + TString workdir = gGrid->GetHomeDirectory(); + workdir += fGridWorkingDir; + if (FileExists(mergingMacro)) gGrid->Rm(mergingMacro); + Info("WriteMergingMacro", "\n##### Copying merging macro: <%s> to your alien workspace", mergingMacro.Data()); + TFile::Cp(Form("file:%s",mergingMacro.Data()), Form("alien://%s/%s", workdir.Data(), mergingMacro.Data())); + } +} + +//______________________________________________________________________________ +Bool_t AliAnalysisAlien::SetupPar(const char *package) +{ +// Compile the par file archive pointed by . This must be present in the current durectory. +// Note that for loading the compiled library. The current directory should have precedence in +// LD_LIBRARY_PATH + TString pkgdir = package; + pkgdir.ReplaceAll(".par",""); + gSystem->Exec(Form("tar xvzf %s.par", pkgdir.Data())); + TString cdir = gSystem->WorkingDirectory(); + gSystem->ChangeDirectory(pkgdir); + // Check for BUILD.sh and execute + if (!gSystem->AccessPathName("PROOF-INF/BUILD.sh")) { + printf("**************************************************\n"); + printf("*** Building PAR archive %s\n", package); + printf("**************************************************\n"); + if (gSystem->Exec("PROOF-INF/BUILD.sh")) { + ::Error("SetupPar", "Cannot build par archive %s", pkgdir.Data()); + gSystem->ChangeDirectory(cdir); + return kFALSE; + } + } else { + ::Error("SetupPar","Cannot access PROOF-INF/BUILD.sh for package %s", pkgdir.Data()); + gSystem->ChangeDirectory(cdir); + return kFALSE; + } + // Check for SETUP.C and execute + if (!gSystem->AccessPathName("PROOF-INF/SETUP.C")) { + printf("**************************************************\n"); + printf("*** Setup PAR archive %s\n", package); + printf("**************************************************\n"); + gROOT->Macro("PROOF-INF/SETUP.C"); + printf("*** Loaded library: %s\n", gSystem->GetLibraries(pkgdir,"",kFALSE)); + } else { + ::Error("SetupPar","Cannot access PROOF-INF/SETUP.C for package %s", pkgdir.Data()); + gSystem->ChangeDirectory(cdir); + return kFALSE; + } + // Restore original workdir + gSystem->ChangeDirectory(cdir); + return kTRUE; +} + //______________________________________________________________________________ void AliAnalysisAlien::WriteExecutable() { @@ -1348,11 +2756,10 @@ void AliAnalysisAlien::WriteExecutable() ofstream out; out.open(fExecutable.Data(), ios::out); if (out.bad()) { - Error("CreateJDL", "Bad file name for executable: %s", fExecutable.Data()); + Error("WriteExecutable", "Bad file name for executable: %s", fExecutable.Data()); return; } out << "#!/bin/bash" << endl; - out << "export GCLIENT_SERVER_LIST=\"pcapiserv04.cern.ch:10000|pcapiserv05.cern.ch:10000|pcapiserv06.cern.ch:10000|pcapiserv07.cern.ch:10000\"" << endl; out << "echo \"=========================================\"" << endl; out << "echo \"############## PATH : ##############\"" << endl; out << "echo $PATH" << endl; @@ -1366,17 +2773,28 @@ void AliAnalysisAlien::WriteExecutable() out << "echo $ALICE_ROOT" << endl; out << "echo \"############## which aliroot : ##############\"" << endl; out << "which aliroot" << endl; + out << "echo \"############## system limits : ##############\"" << endl; + out << "ulimit -a" << endl; + out << "echo \"############## memory : ##############\"" << endl; + out << "free -m" << endl; out << "echo \"=========================================\"" << endl << endl; -// if (TestBit(AliAnalysisGrid::kTest)) out << "root "; - out << "root -b -q "; - out << fAnalysisMacro.Data() << endl << endl; - out << "echo \"======== " << fAnalysisMacro.Data() << " finished ========\"" << endl; + // Make sure we can properly compile par files + if (TObject::TestBit(AliAnalysisGrid::kUsePars)) out << "export LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH" << endl; + out << fExecutableCommand << " "; + out << fAnalysisMacro.Data() << " " << fExecutableArgs.Data() << endl << endl; + out << "echo \"======== " << fAnalysisMacro.Data() << " finished with exit code: $? ========\"" << endl; + out << "echo \"############## memory after: ##############\"" << endl; + out << "free -m" << endl; + out << "echo \"############## Last 10 lines from dmesg : ##############\"" << endl; + out << "dmesg | tail -n 10" << endl; } Bool_t copy = kTRUE; if (TestBit(AliAnalysisGrid::kOffline) || TestBit(AliAnalysisGrid::kTest)) copy = kFALSE; if (copy) { CdWork(); TString workdir = gGrid->GetHomeDirectory(); + TString bindir = Form("%s/bin", workdir.Data()); + if (!DirectoryExists(bindir)) gGrid->Mkdir(bindir); workdir += fGridWorkingDir; TString executable = Form("%s/bin/%s", gGrid->GetHomeDirectory(), fExecutable.Data()); if (FileExists(executable)) gGrid->Rm(executable); @@ -1386,11 +2804,106 @@ void AliAnalysisAlien::WriteExecutable() } //______________________________________________________________________________ -void AliAnalysisAlien::WriteValidationScript() +void AliAnalysisAlien::WriteMergeExecutable() +{ +// Generate the alien executable script for the merging job. + if (!fMergeViaJDL) return; + TString mergeExec = fExecutable; + mergeExec.ReplaceAll(".sh", "_merge.sh"); + if (!TestBit(AliAnalysisGrid::kSubmit)) { + ofstream out; + out.open(mergeExec.Data(), ios::out); + if (out.bad()) { + Error("WriteMergingExecutable", "Bad file name for executable: %s", mergeExec.Data()); + return; + } + out << "#!/bin/bash" << endl; + out << "echo \"=========================================\"" << endl; + out << "echo \"############## PATH : ##############\"" << endl; + out << "echo $PATH" << endl; + out << "echo \"############## LD_LIBRARY_PATH : ##############\"" << endl; + out << "echo $LD_LIBRARY_PATH" << endl; + out << "echo \"############## ROOTSYS : ##############\"" << endl; + out << "echo $ROOTSYS" << endl; + out << "echo \"############## which root : ##############\"" << endl; + out << "which root" << endl; + out << "echo \"############## ALICE_ROOT : ##############\"" << endl; + out << "echo $ALICE_ROOT" << endl; + out << "echo \"############## which aliroot : ##############\"" << endl; + out << "which aliroot" << endl; + out << "echo \"############## system limits : ##############\"" << endl; + out << "ulimit -a" << endl; + out << "echo \"############## memory : ##############\"" << endl; + out << "free -m" << endl; + out << "echo \"=========================================\"" << endl << endl; + // Make sure we can properly compile par files + if (TObject::TestBit(AliAnalysisGrid::kUsePars)) out << "export LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH" << endl; + TString mergeMacro = fExecutable; + mergeMacro.ReplaceAll(".sh", "_merge.C"); + out << "export ARG=\"" << mergeMacro << "(\\\"$1\\\")\"" << endl; + out << fExecutableCommand << " " << "$ARG" << endl; + out << "echo \"======== " << mergeMacro.Data() << " finished with exit code: $? ========\"" << endl; + out << "echo \"############## memory after: ##############\"" << endl; + out << "free -m" << endl; + out << "echo \"############## Last 10 lines from dmesg : ##############\"" << endl; + out << "dmesg | tail -n 10" << endl; + } + Bool_t copy = kTRUE; + if (TestBit(AliAnalysisGrid::kOffline) || TestBit(AliAnalysisGrid::kTest)) copy = kFALSE; + if (copy) { + CdWork(); + TString workdir = gGrid->GetHomeDirectory(); + TString bindir = Form("%s/bin", workdir.Data()); + if (!DirectoryExists(bindir)) gGrid->Mkdir(bindir); + workdir += fGridWorkingDir; + TString executable = Form("%s/bin/%s", gGrid->GetHomeDirectory(), mergeExec.Data()); + if (FileExists(executable)) gGrid->Rm(executable); + Info("CreateJDL", "\n##### Copying executable file <%s> to your AliEn bin directory", mergeExec.Data()); + TFile::Cp(Form("file:%s",mergeExec.Data()), Form("alien://%s", executable.Data())); + } +} + +//______________________________________________________________________________ +void AliAnalysisAlien::WriteProductionFile(const char *filename) const +{ +// Write the production file to be submitted by LPM manager. The format is: +// First line: full_path_to_jdl estimated_no_subjobs_per_master +// Next lines: full_path_to_dataset XXX (XXX is a string) +// To submit, one has to: submit jdl XXX for all lines + ofstream out; + out.open(filename, ios::out); + if (out.bad()) { + Error("WriteProductionFile", "Bad file name: %s", filename); + return; + } + TString workdir = gGrid->GetHomeDirectory(); + workdir += fGridWorkingDir; + Int_t njobspermaster = 1000*fNrunsPerMaster/fSplitMaxInputFileNumber; + TString locjdl = Form("%s/%s", workdir.Data(),fJDLName.Data()); + out << locjdl << " " << njobspermaster << endl; + Int_t nmasterjobs = fInputFiles->GetEntries(); + for (Int_t i=0; iBaseName(fInputFiles->At(i)->GetName()); + runOutDir.ReplaceAll(".xml", ""); + if (fOutputToRunNo) + out << Form("%s", fInputFiles->At(i)->GetName()) << " " << runOutDir << endl; + else + out << Form("%s", fInputFiles->At(i)->GetName()) << " " << Form("%03d", i) << endl; + } + Info("WriteProductionFile", "\n##### Copying production file <%s> to your work directory", filename); + if (FileExists(filename)) gGrid->Rm(filename); + TFile::Cp(Form("file:%s",filename), Form("alien://%s/%s", workdir.Data(),filename)); +} + +//______________________________________________________________________________ +void AliAnalysisAlien::WriteValidationScript(Bool_t merge) { // Generate the alien validation script. // Generate the validation script TObjString *os; + TString validationScript = fExecutable; + if (merge) validationScript.ReplaceAll(".sh", "_mergevalidation.sh"); + else validationScript.ReplaceAll(".sh", "_validation.sh"); if (!Connect()) { Error("WriteValidationScript", "Alien connection required"); return; @@ -1399,7 +2912,7 @@ void AliAnalysisAlien::WriteValidationScript() if (!TestBit(AliAnalysisGrid::kTest)) out_stream = " >> stdout"; if (!TestBit(AliAnalysisGrid::kSubmit)) { ofstream out; - out.open("validate.sh", ios::out); + out.open(validationScript, ios::out); out << "#!/bin/bash" << endl; out << "##################################################" << endl; out << "validateout=`dirname $0`" << endl; @@ -1461,10 +2974,14 @@ void AliAnalysisAlien::WriteValidationScript() TObjArray *arr = fOutputFiles.Tokenize(" "); TIter next1(arr); TString output_file; + AliAnalysisManager *mgr = AliAnalysisManager::GetAnalysisManager(); + TString extra = mgr->GetExtraFiles(); while ((os=(TObjString*)next1())) { output_file = os->GetString(); Int_t index = output_file.Index("@"); if (index > 0) output_file.Remove(index); + if (merge && fMergeExcludes.Contains(output_file)) continue; + if (extra.Contains(output_file)) continue; out << "if ! [ -f " << output_file.Data() << " ] ; then" << endl; out << " error=1" << endl; out << " echo \"Output file(s) not found. Job FAILED !\"" << out_stream << endl; @@ -1472,6 +2989,14 @@ void AliAnalysisAlien::WriteValidationScript() out << "fi" << endl; } delete arr; + if (!merge) { + out << "if ! [ -f outputs_valid ] ; then" << endl; + out << " error=1" << endl; + out << " echo \"Output files were not validated by the analysis manager\" >> stdout" << endl; + out << " echo \"Output files were not validated by the analysis manager\" >> stderr" << endl; + out << "fi" << endl; + } + out << "if [ $error = 0 ] ; then" << endl; out << " echo \"* ---------------- Job Validated ------------------*\"" << out_stream << endl; out << "fi" << endl; @@ -1487,8 +3012,8 @@ void AliAnalysisAlien::WriteValidationScript() CdWork(); TString workdir = gGrid->GetHomeDirectory(); workdir += fGridWorkingDir; - Info("CreateJDL", "\n##### Copying validation script to your AliEn working space"); - if (FileExists("validate.sh")) gGrid->Rm("validate.sh"); - TFile::Cp("file:validate.sh", Form("alien://%s/validate.sh", workdir.Data())); + Info("CreateJDL", "\n##### Copying validation script <%s> to your AliEn working space", validationScript.Data()); + if (FileExists(validationScript)) gGrid->Rm(validationScript); + TFile::Cp(Form("file:%s",validationScript.Data()), Form("alien://%s/%s", workdir.Data(),validationScript.Data())); } }