X-Git-Url: http://git.uio.no/git/?a=blobdiff_plain;f=ANALYSIS%2FAliAnalysisAlien.cxx;h=82f6d977ee81f4c04e59766f607701d25287b4ef;hb=ea7b474069195c842b2122d9aeb6e4715f4a341f;hp=c34875086c24b663fe1cfb49724671af08e85e3f;hpb=d5c6455a778ca649fb996840b1935874efae12e6;p=u%2Fmrichter%2FAliRoot.git diff --git a/ANALYSIS/AliAnalysisAlien.cxx b/ANALYSIS/AliAnalysisAlien.cxx index c34875086c2..82f6d977ee8 100644 --- a/ANALYSIS/AliAnalysisAlien.cxx +++ b/ANALYSIS/AliAnalysisAlien.cxx @@ -21,9 +21,14 @@ //============================================================================== #include "Riostream.h" +#include "TEnv.h" +#include "TBits.h" +#include "TError.h" #include "TROOT.h" #include "TSystem.h" #include "TFile.h" +#include "TFileCollection.h" +#include "TChain.h" #include "TObjString.h" #include "TObjArray.h" #include "TGrid.h" @@ -44,6 +49,7 @@ ClassImp(AliAnalysisAlien) AliAnalysisAlien::AliAnalysisAlien() :AliAnalysisGrid(), fGridJDL(NULL), + fMergingJDL(NULL), fPrice(0), fTTL(0), fSplitMaxInputFileNumber(0), @@ -52,9 +58,17 @@ AliAnalysisAlien::AliAnalysisAlien() fNtestFiles(0), fNrunsPerMaster(0), fMaxMergeFiles(0), + fMaxMergeStages(0), fNsubmitted(0), fProductionMode(0), fOutputToRunNo(0), + fMergeViaJDL(0), + fFastReadOption(0), + fOverwriteMode(1), + fNreplicas(2), + fNproofWorkers(0), + fNproofWorkersPerSlave(0), + fProofReset(0), fRunNumbers(), fExecutable(), fExecutableCommand(), @@ -62,6 +76,7 @@ AliAnalysisAlien::AliAnalysisAlien() fExecutableArgs(), fAnalysisMacro(), fAnalysisSource(), + fValidationScript(), fAdditionalRootLibs(), fAdditionalLibs(), fSplitMode(), @@ -79,6 +94,7 @@ AliAnalysisAlien::AliAnalysisAlien() fInputFormat(), fDatasetName(), fJDLName(), + fTerminateFiles(), fMergeExcludes(), fIncludePath(), fCloseSE(), @@ -86,6 +102,11 @@ AliAnalysisAlien::AliAnalysisAlien() fJobTag(), fOutputSingle(), fRunPrefix(), + fProofCluster(), + fProofDataSet(), + fFileForTestMode(), + fRootVersionForProof(), + fAliRootMode(), fInputFiles(0), fPackages(0) { @@ -97,6 +118,7 @@ AliAnalysisAlien::AliAnalysisAlien() AliAnalysisAlien::AliAnalysisAlien(const char *name) :AliAnalysisGrid(name), fGridJDL(NULL), + fMergingJDL(NULL), fPrice(0), fTTL(0), fSplitMaxInputFileNumber(0), @@ -105,9 +127,17 @@ AliAnalysisAlien::AliAnalysisAlien(const char *name) fNtestFiles(0), fNrunsPerMaster(0), fMaxMergeFiles(0), + fMaxMergeStages(0), fNsubmitted(0), fProductionMode(0), fOutputToRunNo(0), + fMergeViaJDL(0), + fFastReadOption(0), + fOverwriteMode(1), + fNreplicas(2), + fNproofWorkers(0), + fNproofWorkersPerSlave(0), + fProofReset(0), fRunNumbers(), fExecutable(), fExecutableCommand(), @@ -115,6 +145,7 @@ AliAnalysisAlien::AliAnalysisAlien(const char *name) fExecutableArgs(), fAnalysisMacro(), fAnalysisSource(), + fValidationScript(), fAdditionalRootLibs(), fAdditionalLibs(), fSplitMode(), @@ -132,6 +163,7 @@ AliAnalysisAlien::AliAnalysisAlien(const char *name) fInputFormat(), fDatasetName(), fJDLName(), + fTerminateFiles(), fMergeExcludes(), fIncludePath(), fCloseSE(), @@ -139,6 +171,11 @@ AliAnalysisAlien::AliAnalysisAlien(const char *name) fJobTag(), fOutputSingle(), fRunPrefix(), + fProofCluster(), + fProofDataSet(), + fFileForTestMode(), + fRootVersionForProof(), + fAliRootMode(), fInputFiles(0), fPackages(0) { @@ -150,6 +187,7 @@ AliAnalysisAlien::AliAnalysisAlien(const char *name) AliAnalysisAlien::AliAnalysisAlien(const AliAnalysisAlien& other) :AliAnalysisGrid(other), fGridJDL(NULL), + fMergingJDL(NULL), fPrice(other.fPrice), fTTL(other.fTTL), fSplitMaxInputFileNumber(other.fSplitMaxInputFileNumber), @@ -158,9 +196,17 @@ AliAnalysisAlien::AliAnalysisAlien(const AliAnalysisAlien& other) fNtestFiles(other.fNtestFiles), fNrunsPerMaster(other.fNrunsPerMaster), fMaxMergeFiles(other.fMaxMergeFiles), + fMaxMergeStages(other.fMaxMergeStages), fNsubmitted(other.fNsubmitted), fProductionMode(other.fProductionMode), fOutputToRunNo(other.fOutputToRunNo), + fMergeViaJDL(other.fMergeViaJDL), + fFastReadOption(other.fFastReadOption), + fOverwriteMode(other.fOverwriteMode), + fNreplicas(other.fNreplicas), + fNproofWorkers(other.fNproofWorkers), + fNproofWorkersPerSlave(other.fNproofWorkersPerSlave), + fProofReset(other.fProofReset), fRunNumbers(other.fRunNumbers), fExecutable(other.fExecutable), fExecutableCommand(other.fExecutableCommand), @@ -168,6 +214,7 @@ AliAnalysisAlien::AliAnalysisAlien(const AliAnalysisAlien& other) fExecutableArgs(other.fExecutableArgs), fAnalysisMacro(other.fAnalysisMacro), fAnalysisSource(other.fAnalysisSource), + fValidationScript(other.fValidationScript), fAdditionalRootLibs(other.fAdditionalRootLibs), fAdditionalLibs(other.fAdditionalLibs), fSplitMode(other.fSplitMode), @@ -185,6 +232,7 @@ AliAnalysisAlien::AliAnalysisAlien(const AliAnalysisAlien& other) fInputFormat(other.fInputFormat), fDatasetName(other.fDatasetName), fJDLName(other.fJDLName), + fTerminateFiles(other.fTerminateFiles), fMergeExcludes(other.fMergeExcludes), fIncludePath(other.fIncludePath), fCloseSE(other.fCloseSE), @@ -192,11 +240,17 @@ AliAnalysisAlien::AliAnalysisAlien(const AliAnalysisAlien& other) fJobTag(other.fJobTag), fOutputSingle(other.fOutputSingle), fRunPrefix(other.fRunPrefix), + fProofCluster(other.fProofCluster), + fProofDataSet(other.fProofDataSet), + fFileForTestMode(other.fFileForTestMode), + fRootVersionForProof(other.fRootVersionForProof), + fAliRootMode(other.fAliRootMode), fInputFiles(0), fPackages(0) { // Copy ctor. fGridJDL = (TGridJDL*)gROOT->ProcessLine("new TAlienJDL()"); + fMergingJDL = (TGridJDL*)gROOT->ProcessLine("new TAlienJDL()"); fRunRange[0] = other.fRunRange[0]; fRunRange[1] = other.fRunRange[1]; if (other.fInputFiles) { @@ -220,6 +274,7 @@ AliAnalysisAlien::~AliAnalysisAlien() { // Destructor. if (fGridJDL) delete fGridJDL; + if (fMergingJDL) delete fMergingJDL; if (fInputFiles) delete fInputFiles; if (fPackages) delete fPackages; } @@ -231,6 +286,7 @@ AliAnalysisAlien &AliAnalysisAlien::operator=(const AliAnalysisAlien& other) if (this != &other) { AliAnalysisGrid::operator=(other); fGridJDL = (TGridJDL*)gROOT->ProcessLine("new TAlienJDL()"); + fMergingJDL = (TGridJDL*)gROOT->ProcessLine("new TAlienJDL()"); fPrice = other.fPrice; fTTL = other.fTTL; fSplitMaxInputFileNumber = other.fSplitMaxInputFileNumber; @@ -239,9 +295,17 @@ AliAnalysisAlien &AliAnalysisAlien::operator=(const AliAnalysisAlien& other) fNtestFiles = other.fNtestFiles; fNrunsPerMaster = other.fNrunsPerMaster; fMaxMergeFiles = other.fMaxMergeFiles; + fMaxMergeStages = other.fMaxMergeStages; fNsubmitted = other.fNsubmitted; fProductionMode = other.fProductionMode; fOutputToRunNo = other.fOutputToRunNo; + fMergeViaJDL = other.fMergeViaJDL; + fFastReadOption = other.fFastReadOption; + fOverwriteMode = other.fOverwriteMode; + fNreplicas = other.fNreplicas; + fNproofWorkers = other.fNproofWorkers; + fNproofWorkersPerSlave = other.fNproofWorkersPerSlave; + fProofReset = other.fProofReset; fRunNumbers = other.fRunNumbers; fExecutable = other.fExecutable; fExecutableCommand = other.fExecutableCommand; @@ -249,6 +313,7 @@ AliAnalysisAlien &AliAnalysisAlien::operator=(const AliAnalysisAlien& other) fExecutableArgs = other.fExecutableArgs; fAnalysisMacro = other.fAnalysisMacro; fAnalysisSource = other.fAnalysisSource; + fValidationScript = other.fValidationScript; fAdditionalRootLibs = other.fAdditionalRootLibs; fAdditionalLibs = other.fAdditionalLibs; fSplitMode = other.fSplitMode; @@ -266,6 +331,7 @@ AliAnalysisAlien &AliAnalysisAlien::operator=(const AliAnalysisAlien& other) fInputFormat = other.fInputFormat; fDatasetName = other.fDatasetName; fJDLName = other.fJDLName; + fTerminateFiles = other.fTerminateFiles; fMergeExcludes = other.fMergeExcludes; fIncludePath = other.fIncludePath; fCloseSE = other.fCloseSE; @@ -273,6 +339,11 @@ AliAnalysisAlien &AliAnalysisAlien::operator=(const AliAnalysisAlien& other) fJobTag = other.fJobTag; fOutputSingle = other.fOutputSingle; fRunPrefix = other.fRunPrefix; + fProofCluster = other.fProofCluster; + fProofDataSet = other.fProofDataSet; + fFileForTestMode = other.fFileForTestMode; + fRootVersionForProof = other.fRootVersionForProof; + fAliRootMode = other.fAliRootMode; if (other.fInputFiles) { fInputFiles = new TObjArray(); TIter next(other.fInputFiles); @@ -338,11 +409,7 @@ Bool_t AliAnalysisAlien::Connect() { // Try to connect to AliEn. User needs a valid token and /tmp/gclient_env_$UID sourced. if (gGrid && gGrid->IsConnected()) return kTRUE; - if (!gSystem->Getenv("alien_API_USER")) { - Error("Connect", "Make sure you:\n 1. Have called: alien-token-init today\n 2. Have sourced /tmp/gclient_env_%s", - gSystem->Getenv("UID")); - return kFALSE; - } + if (fProductionMode) return kTRUE; if (!gGrid) { Info("Connect", "Trying to connect to AliEn ..."); TGrid::Connect("alien://"); @@ -372,34 +439,86 @@ void AliAnalysisAlien::CdWork() } // Work directory not existing - create it gGrid->Cd(homedir); - if (gGrid->Mkdir(workdir)) { + if (gGrid->Mkdir(workdir, "-p")) { gGrid->Cd(fGridWorkingDir); - Info("CreateJDL", "\n##### Created alien working directory %s", fGridWorkingDir.Data()); + Info("CdWork", "\n##### Created alien working directory %s", fGridWorkingDir.Data()); } else { - Warning("CreateJDL", "Working directory %s cannot be created.\n Using %s instead.", + Warning("CdWork", "Working directory %s cannot be created.\n Using %s instead.", workdir.Data(), homedir.Data()); fGridWorkingDir = ""; } } +//______________________________________________________________________________ +Bool_t AliAnalysisAlien::CheckFileCopy(const char *alienpath) +{ +// Check if file copying is possible. + if (fProductionMode) return kTRUE; + if (!Connect()) { + Error("CheckFileCopy", "Not connected to AliEn. File copying cannot be tested."); + return kFALSE; + } + Info("CheckFileCopy", "Checking possibility to copy files to your AliEn home directory... \ + \n +++ NOTE: You can disable this via: plugin->SetCheckCopy(kFALSE);"); + // Check if alien_CLOSE_SE is defined + TString closeSE = gSystem->Getenv("alien_CLOSE_SE"); + if (!closeSE.IsNull()) { + Info("CheckFileCopy", "Your current close storage is pointing to: \ + \n alien_CLOSE_SE = \"%s\"", closeSE.Data()); + } else { + Warning("CheckFileCopy", "Your current close storage is empty ! Depending on your location, file copying may fail."); + } + // Check if grid directory exists. + if (!DirectoryExists(alienpath)) { + Error("CheckFileCopy", "Alien path %s does not seem to exist", alienpath); + return kFALSE; + } + TFile f("plugin_test_copy", "RECREATE"); + // User may not have write permissions to current directory + if (f.IsZombie()) { + Error("CheckFileCopy", "Cannot create local test file. Do you have write access to current directory: <%s> ?", + gSystem->WorkingDirectory()); + return kFALSE; + } + f.Close(); + if (FileExists(Form("alien://%s/%s",alienpath, f.GetName()))) gGrid->Rm(Form("alien://%s/%s",alienpath, f.GetName())); + if (!TFile::Cp(f.GetName(), Form("alien://%s/%s",alienpath, f.GetName()))) { + Error("CheckFileCopy", "Cannot copy files to Alien destination: <%s> This may be temporary, or: \ + \n# 1. Make sure you have write permissions there. If this is the case: \ + \n# 2. Check the storage availability at: http://alimonitor.cern.ch/stats?page=SE/table \ + \n# Do: export alien_CLOSE_SE=\"working_disk_SE\" \ + \n# To make this permanent put in in your .bashrc (in .alienshrc is not enough) \ + \n# Redo token: rm /tmp/x509up_u$UID then: alien-token-init ", alienpath); + gSystem->Unlink(f.GetName()); + return kFALSE; + } + gSystem->Unlink(f.GetName()); + gGrid->Rm(Form("%s%s",alienpath,f.GetName())); + Info("CheckFileCopy", "### ...SUCCESS ###"); + return kTRUE; +} + //______________________________________________________________________________ Bool_t AliAnalysisAlien::CheckInputData() { // Check validity of input data. If necessary, create xml files. + if (fProductionMode) return kTRUE; if (!fInputFiles && !fRunNumbers.Length() && !fRunRange[0]) { if (!fGridDataDir.Length()) { Error("CkeckInputData", "AliEn path to base data directory must be set.\n = Use: SetGridDataDir()"); return kFALSE; } Info("CheckInputData", "Analysis will make a single xml for base data directory %s",fGridDataDir.Data()); + if (fDataPattern.Contains("tag") && TestBit(AliAnalysisGrid::kTest)) + TObject::SetBit(AliAnalysisGrid::kUseTags, kTRUE); // ADDED (fix problem in determining the tag usage in test mode) return kTRUE; } // Process declared files - Bool_t is_collection = kFALSE; - Bool_t is_xml = kFALSE; - Bool_t use_tags = kFALSE; + Bool_t isCollection = kFALSE; + Bool_t isXml = kFALSE; + Bool_t useTags = kFALSE; Bool_t checked = kFALSE; - CdWork(); + if (!TestBit(AliAnalysisGrid::kTest)) CdWork(); TString file; TString workdir = gGrid->GetHomeDirectory(); workdir += fGridWorkingDir; @@ -424,12 +543,12 @@ Bool_t AliAnalysisAlien::CheckInputData() CheckDataType(file, iscoll, isxml, usetags); if (!checked) { checked = kTRUE; - is_collection = iscoll; - is_xml = isxml; - use_tags = usetags; - TObject::SetBit(AliAnalysisGrid::kUseTags, use_tags); + isCollection = iscoll; + isXml = isxml; + useTags = usetags; + TObject::SetBit(AliAnalysisGrid::kUseTags, useTags); } else { - if ((iscoll != is_collection) || (isxml != is_xml) || (usetags != use_tags)) { + if ((iscoll != isCollection) || (isxml != isXml) || (usetags != useTags)) { Error("CheckInputData", "Some conflict was found in the types of inputs"); return kFALSE; } @@ -447,12 +566,12 @@ Bool_t AliAnalysisAlien::CheckInputData() Error("CheckInputData", "Data directory %s not existing.", fGridDataDir.Data()); return kFALSE; } - if (is_collection) { + if (isCollection) { Error("CheckInputData", "You are using raw AliEn collections as input. Cannot process run numbers."); return kFALSE; } - if (checked && !is_xml) { + if (checked && !isXml) { Error("CheckInputData", "Cannot mix processing of full runs with non-xml files"); return kFALSE; } @@ -464,10 +583,10 @@ Bool_t AliAnalysisAlien::CheckInputData() TString path; if (!checked) { checked = kTRUE; - use_tags = fDataPattern.Contains("tag"); - TObject::SetBit(AliAnalysisGrid::kUseTags, use_tags); + useTags = fDataPattern.Contains("tag"); + TObject::SetBit(AliAnalysisGrid::kUseTags, useTags); } - if (use_tags != fDataPattern.Contains("tag")) { + if (useTags != fDataPattern.Contains("tag")) { Error("CheckInputData", "Cannot mix input files using/not using tags"); return kFALSE; } @@ -485,9 +604,9 @@ Bool_t AliAnalysisAlien::CheckInputData() TString msg = "\n##### file: "; msg += path; msg += " type: xml_collection;"; - if (use_tags) msg += " using_tags: Yes"; + if (useTags) msg += " using_tags: Yes"; else msg += " using_tags: No"; - Info("CheckDataType", msg.Data()); + Info("CheckDataType", "%s", msg.Data()); if (fNrunsPerMaster<2) { AddDataFile(Form("%s.xml", os->GetString().Data())); } else { @@ -513,9 +632,9 @@ Bool_t AliAnalysisAlien::CheckInputData() TString msg = "\n##### file: "; msg += path; msg += " type: xml_collection;"; - if (use_tags) msg += " using_tags: Yes"; + if (useTags) msg += " using_tags: Yes"; else msg += " using_tags: No"; - Info("CheckDataType", msg.Data()); + Info("CheckDataType", "%s", msg.Data()); if (fNrunsPerMaster<2) { AddDataFile(Form("%s%d.xml",fRunPrefix.Data(),irun)); } else { @@ -541,14 +660,14 @@ Bool_t AliAnalysisAlien::CheckInputData() Bool_t AliAnalysisAlien::CreateDataset(const char *pattern) { // Create dataset for the grid data directory + run number. - if (TestBit(AliAnalysisGrid::kOffline)) return kFALSE; + if (fProductionMode || TestBit(AliAnalysisGrid::kOffline)) return kTRUE; if (!Connect()) { Error("CreateDataset", "Cannot create dataset with no grid connection"); return kFALSE; } // Cd workspace - CdWork(); + if (!TestBit(AliAnalysisGrid::kTest)) CdWork(); TString workdir = gGrid->GetHomeDirectory(); workdir += fGridWorkingDir; @@ -574,7 +693,7 @@ Bool_t AliAnalysisAlien::CreateDataset(const char *pattern) // CdWork(); if (TestBit(AliAnalysisGrid::kTest)) file = "wn.xml"; else file = Form("%s.xml", gSystem->BaseName(path)); - if (gSystem->AccessPathName(file) || TestBit(AliAnalysisGrid::kTest)) { + if (gSystem->AccessPathName(file) || TestBit(AliAnalysisGrid::kTest) || fOverwriteMode) { command = "find "; command += options; command += path; @@ -586,9 +705,22 @@ Bool_t AliAnalysisAlien::CreateDataset(const char *pattern) if (res) delete res; // Write standard output to file gROOT->ProcessLine(Form("gGrid->Stdout(); > %s", file.Data())); - } - if (!TestBit(AliAnalysisGrid::kTest) && !FileExists(file)) { + Bool_t hasGrep = (gSystem->Exec("grep --version 2>/dev/null > /dev/null")==0)?kTRUE:kFALSE; + Bool_t nullFile = kFALSE; + if (!hasGrep) { + Warning("CreateDataset", "'grep' command not available on this system - cannot validate the result of the grid 'find' command"); + } else { + nullFile = (gSystem->Exec(Form("grep /event %s 2>/dev/null > /dev/null",file.Data()))==0)?kFALSE:kTRUE; + if (nullFile) { + Error("CreateDataset","Dataset %s produced by the previous find command is empty !", file.Data()); + return kFALSE; + } + } + } + Bool_t fileExists = FileExists(file); + if (!TestBit(AliAnalysisGrid::kTest) && (!fileExists || fOverwriteMode)) { // Copy xml file to alien space + if (fileExists) gGrid->Rm(file); TFile::Cp(Form("file:%s",file.Data()), Form("alien://%s/%s",workdir.Data(), file.Data())); if (!FileExists(file)) { Error("CreateDataset", "Command %s did NOT succeed", command.Data()); @@ -600,6 +732,7 @@ Bool_t AliAnalysisAlien::CreateDataset(const char *pattern) return kTRUE; } // Several runs + Bool_t nullResult = kTRUE; if (fRunNumbers.Length()) { TObjArray *arr = fRunNumbers.Tokenize(" "); TObjString *os; @@ -611,7 +744,7 @@ Bool_t AliAnalysisAlien::CreateDataset(const char *pattern) if (TestBit(AliAnalysisGrid::kTest)) file = "wn.xml"; else file = Form("%s.xml", os->GetString().Data()); // If local collection file does not exist, create it via 'find' command. - if (gSystem->AccessPathName(file)) { + if (gSystem->AccessPathName(file) || TestBit(AliAnalysisGrid::kTest) || fOverwriteMode) { command = "find "; command += options; command += path; @@ -621,13 +754,29 @@ Bool_t AliAnalysisAlien::CreateDataset(const char *pattern) if (res) delete res; // Write standard output to file gROOT->ProcessLine(Form("gGrid->Stdout(); > %s", file.Data())); - } + Bool_t hasGrep = (gSystem->Exec("grep --version 2>/dev/null > /dev/null")==0)?kTRUE:kFALSE; + Bool_t nullFile = kFALSE; + if (!hasGrep) { + Warning("CreateDataset", "'grep' command not available on this system - cannot validate the result of the grid 'find' command"); + } else { + nullFile = (gSystem->Exec(Form("grep /event %s 2>/dev/null > /dev/null",file.Data()))==0)?kFALSE:kTRUE; + if (nullFile) { + Warning("CreateDataset","Dataset %s produced by: <%s> is empty !", file.Data(), command.Data()); + fRunNumbers.ReplaceAll(os->GetString().Data(), ""); + continue; + } + } + nullResult = kFALSE; + } if (TestBit(AliAnalysisGrid::kTest)) break; // Check if there is one run per master job. if (fNrunsPerMaster<2) { if (FileExists(file)) { - Info("CreateDataset", "\n##### Dataset %s exist. Skipping creation...", file.Data()); - continue; + if (fOverwriteMode) gGrid->Rm(file); + else { + Info("CreateDataset", "\n##### Dataset %s exist. Skipping creation...", file.Data()); + continue; + } } // Copy xml file to alien space TFile::Cp(Form("file:%s",file.Data()), Form("alien://%s/%s",workdir.Data(), file.Data())); @@ -651,9 +800,12 @@ Bool_t AliAnalysisAlien::CreateDataset(const char *pattern) continue; } schunk += Form("_%s.xml", os->GetString().Data()); - if (FileExists(schunk)) { - Info("CreateDataset", "\n##### Dataset %s exist. Skipping creation...", schunk.Data()); - continue; + if (FileExists(schunk)) { + if (fOverwriteMode) gGrid->Rm(file); + else { + Info("CreateDataset", "\n##### Dataset %s exist. Skipping creation...", schunk.Data()); + continue; + } } printf("Exporting merged collection <%s> and copying to AliEn\n", schunk.Data()); cbase->ExportXML(Form("file://%s", schunk.Data()),kFALSE,kFALSE, schunk, "Merged runs"); @@ -663,9 +815,13 @@ Bool_t AliAnalysisAlien::CreateDataset(const char *pattern) delete arr; return kFALSE; } - } + } } delete arr; + if (nullResult) { + Error("CreateDataset", "No valid dataset corresponding to the query!"); + return kFALSE; + } } else { // Process a full run range. for (Int_t irun=fRunRange[0]; irun<=fRunRange[1]; irun++) { @@ -674,13 +830,15 @@ Bool_t AliAnalysisAlien::CreateDataset(const char *pattern) // CdWork(); if (TestBit(AliAnalysisGrid::kTest)) file = "wn.xml"; else file = Form("%s%d.xml", fRunPrefix.Data(), irun); - if (FileExists(file) && fNrunsPerMaster<2 && !TestBit(AliAnalysisGrid::kTest)) { - Info("CreateDataset", "\n##### Dataset %s exist. Skipping creation...", file.Data()); -// gGrid->Rm(file); - continue; + if (FileExists(file) && fNrunsPerMaster<2 && !TestBit(AliAnalysisGrid::kTest)) { + if (fOverwriteMode) gGrid->Rm(file); + else { + Info("CreateDataset", "\n##### Dataset %s exist. Skipping creation...", file.Data()); + continue; + } } // If local collection file does not exist, create it via 'find' command. - if (gSystem->AccessPathName(file)) { + if (gSystem->AccessPathName(file) || TestBit(AliAnalysisGrid::kTest) || fOverwriteMode) { command = "find "; command += options; command += path; @@ -690,13 +848,28 @@ Bool_t AliAnalysisAlien::CreateDataset(const char *pattern) if (res) delete res; // Write standard output to file gROOT->ProcessLine(Form("gGrid->Stdout(); > %s", file.Data())); + Bool_t hasGrep = (gSystem->Exec("grep --version 2>/dev/null > /dev/null")==0)?kTRUE:kFALSE; + Bool_t nullFile = kFALSE; + if (!hasGrep) { + Warning("CreateDataset", "'grep' command not available on this system - cannot validate the result of the grid 'find' command"); + } else { + nullFile = (gSystem->Exec(Form("grep /event %s 2>/dev/null > /dev/null",file.Data()))==0)?kFALSE:kTRUE; + if (nullFile) { + Warning("CreateDataset","Dataset %s produced by: <%s> is empty !", file.Data(), command.Data()); + continue; + } + } + nullResult = kFALSE; } if (TestBit(AliAnalysisGrid::kTest)) break; // Check if there is one run per master job. if (fNrunsPerMaster<2) { if (FileExists(file)) { - Info("CreateDataset", "\n##### Dataset %s exist. Skipping creation...", file.Data()); - continue; + if (fOverwriteMode) gGrid->Rm(file); + else { + Info("CreateDataset", "\n##### Dataset %s exist. Skipping creation...", file.Data()); + continue; + } } // Copy xml file to alien space TFile::Cp(Form("file:%s",file.Data()), Form("alien://%s/%s",workdir.Data(), file.Data())); @@ -708,7 +881,10 @@ Bool_t AliAnalysisAlien::CreateDataset(const char *pattern) nruns++; // Check if the collection for the chunk exist locally. Int_t nchunk = (nruns-1)/fNrunsPerMaster; - if (FileExists(fInputFiles->At(nchunk)->GetName())) continue; + if (FileExists(fInputFiles->At(nchunk)->GetName())) { + if (fOverwriteMode) gGrid->Rm(fInputFiles->At(nchunk)->GetName()); + else continue; + } printf(" Merging collection <%s> into %d runs chunk...\n",file.Data(),fNrunsPerMaster); if (((nruns-1)%fNrunsPerMaster) == 0) { schunk = Form("%s%d", fRunPrefix.Data(), irun); @@ -724,14 +900,20 @@ Bool_t AliAnalysisAlien::CreateDataset(const char *pattern) } schunk = schunk2; if (FileExists(schunk)) { - Info("CreateDataset", "\n##### Dataset %s exist. Skipping creation...", schunk.Data()); - continue; + if (fOverwriteMode) gGrid->Rm(schunk); + else { + Info("CreateDataset", "\n##### Dataset %s exist. Skipping creation...", schunk.Data()); + continue; + } } printf("Exporting merged collection <%s> and copying to AliEn.\n", schunk.Data()); cbase->ExportXML(Form("file://%s", schunk.Data()),kFALSE,kFALSE, schunk, "Merged runs"); if (FileExists(schunk)) { - Info("CreateDataset", "\n##### Dataset %s exist. Skipping copy...", schunk.Data()); - continue; + if (fOverwriteMode) gGrid->Rm(schunk); + else { + Info("CreateDataset", "\n##### Dataset %s exist. Skipping copy...", schunk.Data()); + continue; + } } TFile::Cp(Form("file:%s",schunk.Data()), Form("alien://%s/%s",workdir.Data(), schunk.Data())); if (!FileExists(schunk)) { @@ -740,6 +922,10 @@ Bool_t AliAnalysisAlien::CreateDataset(const char *pattern) } } } + if (nullResult) { + Error("CreateDataset", "No valid dataset corresponding to the query!"); + return kFALSE; + } } return kTRUE; } @@ -752,7 +938,7 @@ Bool_t AliAnalysisAlien::CreateJDL() Bool_t error = kFALSE; TObjArray *arr = 0; Bool_t copy = kTRUE; - if (TestBit(AliAnalysisGrid::kOffline) || TestBit(AliAnalysisGrid::kTest)) copy = kFALSE; + if (fProductionMode || TestBit(AliAnalysisGrid::kOffline) || TestBit(AliAnalysisGrid::kTest)) copy = kFALSE; Bool_t generate = kTRUE; if (TestBit(AliAnalysisGrid::kTest) || TestBit(AliAnalysisGrid::kSubmit)) generate = kFALSE; if (!Connect()) { @@ -760,8 +946,9 @@ Bool_t AliAnalysisAlien::CreateJDL() return kFALSE; } // Check validity of alien workspace - CdWork(); - TString workdir = gGrid->GetHomeDirectory(); + TString workdir; + if (!fProductionMode && !fGridWorkingDir.BeginsWith("/alice")) workdir = gGrid->GetHomeDirectory(); + if (!fProductionMode && !TestBit(AliAnalysisGrid::kTest)) CdWork(); workdir += fGridWorkingDir; if (generate) { TObjString *os; @@ -780,39 +967,72 @@ Bool_t AliAnalysisAlien::CreateJDL() Error("CreateJDL", "You must define AliEn output directory"); error = kTRUE; } else { - if (!fGridOutputDir.Contains("/")) fGridOutputDir = Form("%s/%s", workdir.Data(), fGridOutputDir.Data()); - if (!DirectoryExists(fGridOutputDir)) { - if (gGrid->Mkdir(fGridOutputDir)) { - Info("CreateJDL", "\n##### Created alien output directory %s", fGridOutputDir.Data()); - } else { - Error("CreateJDL", "Could not create alien output directory %s", fGridOutputDir.Data()); - // error = kTRUE; - } - } - gGrid->Cd(workdir); + if (!fProductionMode) { + if (!fGridOutputDir.Contains("/")) fGridOutputDir = Form("%s/%s", workdir.Data(), fGridOutputDir.Data()); + if (!DirectoryExists(fGridOutputDir)) { + if (gGrid->Mkdir(fGridOutputDir,"-p")) { + Info("CreateJDL", "\n##### Created alien output directory %s", fGridOutputDir.Data()); + } else { + Error("CreateJDL", "Could not create alien output directory %s", fGridOutputDir.Data()); + // error = kTRUE; + } + } + gGrid->Cd(workdir); + } } // Exit if any error up to now if (error) return kFALSE; // Set JDL fields - fGridJDL->SetValue("User", Form("\"%s\"", fUser.Data())); - fGridJDL->SetExecutable(fExecutable); + if (!fUser.IsNull()) { + fGridJDL->SetValue("User", Form("\"%s\"", fUser.Data())); + fMergingJDL->SetValue("User", Form("\"%s\"", fUser.Data())); + } + fGridJDL->SetExecutable(fExecutable, "This is the startup script"); + TString mergeExec = fExecutable; + mergeExec.ReplaceAll(".sh", "_merge.sh"); + fMergingJDL->SetExecutable(mergeExec, "This is the startup script"); + mergeExec.ReplaceAll(".sh", ".C"); + fMergingJDL->AddToInputSandbox(Form("LF:%s/%s", workdir.Data(),mergeExec.Data()), "List of input files to be uploaded to workers"); if (!fArguments.IsNull()) fGridJDL->SetArguments(fArguments, "Arguments for the executable command"); -// fGridJDL->SetTTL((UInt_t)fTTL); - fGridJDL->SetValue("TTL", Form("\"%d\"", fTTL)); - if (fMaxInitFailed > 0) + if (IsOneStageMerging()) fMergingJDL->SetArguments(fGridOutputDir); + else fMergingJDL->SetArguments("wn.xml $2 $3"); // xml, stage, laststage(0 or 1) + + fGridJDL->SetValue("TTL", Form("\"%d\"",fTTL)); + fGridJDL->SetDescription("TTL", Form("Time after which the job is killed (%d min.)", fTTL/60)); + fMergingJDL->SetValue("TTL", Form("\"%d\"",fTTL)); + fMergingJDL->SetDescription("TTL", Form("Time after which the job is killed (%d min.)", fTTL/60)); + + if (fMaxInitFailed > 0) { fGridJDL->SetValue("MaxInitFailed", Form("\"%d\"",fMaxInitFailed)); - if (fSplitMaxInputFileNumber > 0) + fGridJDL->SetDescription("MaxInitFailed", "Maximum number of first failing jobs to abort the master job"); + } + if (fSplitMaxInputFileNumber > 0) { fGridJDL->SetValue("SplitMaxInputFileNumber", Form("\"%d\"", fSplitMaxInputFileNumber)); - if (fSplitMode.Length()) + fGridJDL->SetDescription("SplitMaxInputFileNumber", "Maximum number of input files to be processed per subjob"); + } + if (!IsOneStageMerging()) { + fMergingJDL->SetValue("SplitMaxInputFileNumber", "\"$3\""); + fMergingJDL->SetDescription("SplitMaxInputFileNumber", "Maximum number of input files to be merged in one go"); + } + if (fSplitMode.Length()) { fGridJDL->SetValue("Split", Form("\"%s\"", fSplitMode.Data())); -// fGridJDL->SetSplitMode(fSplitMode, (UInt_t)fSplitMaxInputFileNumber); - if (fAliROOTVersion.Length()) - fGridJDL->AddToPackages("AliRoot", fAliROOTVersion); - if (fROOTVersion.Length()) + fGridJDL->SetDescription("Split", "We split per SE or file"); + } + fMergingJDL->SetValue("Split", "\"se\""); + fMergingJDL->SetDescription("Split", "We split per SE for merging in stages"); + if (!fAliROOTVersion.IsNull()) { + fGridJDL->AddToPackages("AliRoot", fAliROOTVersion,"VO_ALICE", "List of requested packages"); + fMergingJDL->AddToPackages("AliRoot", fAliROOTVersion, "VO_ALICE", "List of requested packages"); + } + if (!fROOTVersion.IsNull()) { fGridJDL->AddToPackages("ROOT", fROOTVersion); - if (fAPIVersion.Length()) + fMergingJDL->AddToPackages("ROOT", fROOTVersion); + } + if (!fAPIVersion.IsNull()) { fGridJDL->AddToPackages("APISCONFIG", fAPIVersion); + fMergingJDL->AddToPackages("APISCONFIG", fAPIVersion); + } if (!fExternalPackages.IsNull()) { arr = fExternalPackages.Tokenize(" "); TIter next(arr); @@ -822,15 +1042,19 @@ Bool_t AliAnalysisAlien::CreateJDL() TString pkgversion = pkgname(index+2, pkgname.Length()); pkgname.Remove(index); fGridJDL->AddToPackages(pkgname, pkgversion); + fMergingJDL->AddToPackages(pkgname, pkgversion); } delete arr; } - fGridJDL->SetInputDataListFormat(fInputFormat); - fGridJDL->SetInputDataList("wn.xml"); - fGridJDL->AddToInputSandbox(Form("LF:%s/%s", workdir.Data(), fAnalysisMacro.Data())); + fGridJDL->SetInputDataListFormat(fInputFormat, "Format of input data"); + fGridJDL->SetInputDataList("wn.xml", "Collection name to be processed on each worker node"); + fMergingJDL->SetInputDataListFormat(fInputFormat, "Format of input data"); + fMergingJDL->SetInputDataList("wn.xml", "Collection name to be processed on each worker node"); + fGridJDL->AddToInputSandbox(Form("LF:%s/%s", workdir.Data(), fAnalysisMacro.Data()), "List of input files to be uploaded to workers"); TString analysisFile = fExecutable; analysisFile.ReplaceAll(".sh", ".root"); fGridJDL->AddToInputSandbox(Form("LF:%s/%s", workdir.Data(),analysisFile.Data())); + fMergingJDL->AddToInputSandbox(Form("LF:%s/%s", workdir.Data(),analysisFile.Data())); if (IsUsingTags() && !gSystem->AccessPathName("ConfigureCuts.C")) fGridJDL->AddToInputSandbox(Form("LF:%s/ConfigureCuts.C", workdir.Data())); if (fAdditionalLibs.Length()) { @@ -839,44 +1063,105 @@ Bool_t AliAnalysisAlien::CreateJDL() while ((os=(TObjString*)next())) { if (os->GetString().Contains(".so")) continue; fGridJDL->AddToInputSandbox(Form("LF:%s/%s", workdir.Data(), os->GetString().Data())); + fMergingJDL->AddToInputSandbox(Form("LF:%s/%s", workdir.Data(), os->GetString().Data())); } delete arr; } if (fPackages) { TIter next(fPackages); TObject *obj; - while ((obj=next())) + while ((obj=next())) { fGridJDL->AddToInputSandbox(Form("LF:%s/%s", workdir.Data(), obj->GetName())); + fMergingJDL->AddToInputSandbox(Form("LF:%s/%s", workdir.Data(), obj->GetName())); + } } if (fOutputArchive.Length()) { arr = fOutputArchive.Tokenize(" "); TIter next(arr); - while ((os=(TObjString*)next())) - if (!os->GetString().Contains("@") && fCloseSE.Length()) - fGridJDL->AddToOutputArchive(Form("%s@%s",os->GetString().Data(), fCloseSE.Data())); - else - fGridJDL->AddToOutputArchive(os->GetString()); + Bool_t first = kTRUE; + const char *comment = "Files to be archived"; + const char *comment1 = comment; + while ((os=(TObjString*)next())) { + if (!first) comment = NULL; + if (!os->GetString().Contains("@") && fCloseSE.Length()) + fGridJDL->AddToOutputArchive(Form("%s@%s",os->GetString().Data(), fCloseSE.Data()), comment); + else + fGridJDL->AddToOutputArchive(os->GetString(), comment); + first = kFALSE; + } delete arr; + // Output archive for the merging jdl + TString outputArchive; + if (TestBit(AliAnalysisGrid::kDefaultOutputs)) { + outputArchive = "log_archive.zip:std*,*.stat@disk=1 "; + // Add normal output files, extra files + terminate files + TString files = GetListOfFiles("outextter"); + // Do not register merge excludes + if (!fMergeExcludes.IsNull()) { + arr = fMergeExcludes.Tokenize(" "); + TIter next1(arr); + while ((os=(TObjString*)next1())) { + files.ReplaceAll(Form("%s,",os->GetString().Data()),""); + files.ReplaceAll(os->GetString(),""); + } + delete arr; + } + files.ReplaceAll(".root", "*.root"); + outputArchive += Form("root_archive.zip:%s@disk=%d",files.Data(),fNreplicas); + } else { + TString files = fOutputArchive; + files.ReplaceAll(".root", "*.root"); // nreplicas etc should be already atttached by use + outputArchive = files; + } + arr = outputArchive.Tokenize(" "); + TIter next2(arr); + comment = comment1; + first = kTRUE; + while ((os=(TObjString*)next2())) { + if (!first) comment = NULL; + TString currentfile = os->GetString(); + if (!currentfile.Contains("@") && fCloseSE.Length()) + fMergingJDL->AddToOutputArchive(Form("%s@%s",currentfile.Data(), fCloseSE.Data()), comment); + else + fMergingJDL->AddToOutputArchive(currentfile, comment); + first = kFALSE; + } + delete arr; } - arr = fOutputFiles.Tokenize(" "); + arr = fOutputFiles.Tokenize(","); TIter next(arr); + Bool_t first = kTRUE; + const char *comment = "Files to be saved"; while ((os=(TObjString*)next())) { // Ignore ouputs in jdl that are also in outputarchive TString sout = os->GetString(); + sout.ReplaceAll("*", ""); + sout.ReplaceAll(".root", ""); if (sout.Index("@")>0) sout.Remove(sout.Index("@")); if (fOutputArchive.Contains(sout)) continue; + if (!first) comment = NULL; + if (!os->GetString().Contains("@") && fCloseSE.Length()) + fGridJDL->AddToOutputSandbox(Form("%s@%s",os->GetString().Data(), fCloseSE.Data()), comment); + else + fGridJDL->AddToOutputSandbox(os->GetString(), comment); + first = kFALSE; + if (fMergeExcludes.Contains(sout)) continue; if (!os->GetString().Contains("@") && fCloseSE.Length()) - fGridJDL->AddToOutputSandbox(Form("%s@%s",os->GetString().Data(), fCloseSE.Data())); + fMergingJDL->AddToOutputSandbox(Form("%s@%s",os->GetString().Data(), fCloseSE.Data()), comment); else - fGridJDL->AddToOutputSandbox(os->GetString()); + fMergingJDL->AddToOutputSandbox(os->GetString(), comment); } delete arr; -// fGridJDL->SetPrice((UInt_t)fPrice); - fGridJDL->SetValue("Price", Form("\"%d\"", fPrice)); - TString validationScript = fExecutable; - validationScript.ReplaceAll(".sh", "_validation.sh"); - fGridJDL->SetValidationCommand(Form("%s/%s", workdir.Data(),validationScript.Data())); - if (fMasterResubmitThreshold) fGridJDL->SetValue("MasterResubmitThreshold", Form("\"%d%%\"", fMasterResubmitThreshold)); + fGridJDL->SetPrice((UInt_t)fPrice, "AliEn price for this job"); + fMergingJDL->SetPrice((UInt_t)fPrice, "AliEn price for this job"); + TString validationScript = fValidationScript; + fGridJDL->SetValidationCommand(Form("%s/%s", workdir.Data(),validationScript.Data()), "Validation script to be run for each subjob"); + validationScript.ReplaceAll(".sh", "_merge.sh"); + fMergingJDL->SetValidationCommand(Form("%s/%s", workdir.Data(),validationScript.Data()), "Validation script to be run for each subjob"); + if (fMasterResubmitThreshold) { + fGridJDL->SetValue("MasterResubmitThreshold", Form("\"%d%%\"", fMasterResubmitThreshold)); + fGridJDL->SetDescription("MasterResubmitThreshold", "Resubmit failed jobs until DONE rate reaches this percentage"); + } // Write a jdl with 2 input parameters: collection name and output dir name. WriteJDL(copy); } @@ -888,8 +1173,8 @@ Bool_t AliAnalysisAlien::CreateJDL() return kFALSE; } else { if (!fGridOutputDir.Contains("/")) fGridOutputDir = Form("%s/%s", workdir.Data(), fGridOutputDir.Data()); - if (!DirectoryExists(fGridOutputDir)) { - if (gGrid->Mkdir(fGridOutputDir)) { + if (!fProductionMode && !DirectoryExists(fGridOutputDir)) { + if (gGrid->Mkdir(fGridOutputDir,"-p")) { Info("CreateJDL", "\n##### Created alien output directory %s", fGridOutputDir.Data()); } else { Error("CreateJDL", "Could not create alien output directory %s", fGridOutputDir.Data()); @@ -899,12 +1184,22 @@ Bool_t AliAnalysisAlien::CreateJDL() gGrid->Cd(workdir); } if (TestBit(AliAnalysisGrid::kSubmit)) { - Info("CreateJDL", "\n##### Copying JDL file <%s> to your AliEn output directory", fJDLName.Data()); + TString mergeJDLName = fExecutable; + mergeJDLName.ReplaceAll(".sh", "_merge.jdl"); TString locjdl = Form("%s/%s", fGridOutputDir.Data(),fJDLName.Data()); - if (fProductionMode) + TString locjdl1 = Form("%s/%s", fGridOutputDir.Data(),mergeJDLName.Data()); + if (fProductionMode) { locjdl = Form("%s/%s", workdir.Data(),fJDLName.Data()); + locjdl1 = Form("%s/%s", workdir.Data(),mergeJDLName.Data()); + } if (FileExists(locjdl)) gGrid->Rm(locjdl); + if (FileExists(locjdl1)) gGrid->Rm(locjdl1); + Info("CreateJDL", "\n##### Copying JDL file <%s> to your AliEn output directory", fJDLName.Data()); TFile::Cp(Form("file:%s",fJDLName.Data()), Form("alien://%s", locjdl.Data())); + if (fMergeViaJDL) { + Info("CreateJDL", "\n##### Copying merging JDL file <%s> to your AliEn output directory", mergeJDLName.Data()); + TFile::Cp(Form("file:%s",mergeJDLName.Data()), Form("alien://%s", locjdl1.Data())); + } } if (fAdditionalLibs.Length()) { arr = fAdditionalLibs.Tokenize(" "); @@ -922,6 +1217,7 @@ Bool_t AliAnalysisAlien::CreateJDL() TIter next(fPackages); TObject *obj; while ((obj=next())) { + if (FileExists(obj->GetName())) gGrid->Rm(obj->GetName()); Info("CreateJDL", "\n##### Copying dependency: <%s> to your alien workspace", obj->GetName()); TFile::Cp(Form("file:%s",obj->GetName()), Form("alien://%s/%s", workdir.Data(), obj->GetName())); } @@ -937,71 +1233,50 @@ Bool_t AliAnalysisAlien::WriteJDL(Bool_t copy) // all run numbers are considered in one go (jdl). For non-negative indices // they correspond to the indices in the array fInputFiles. if (!fInputFiles) return kFALSE; - TObjString *os; - TString workdir = gGrid->GetHomeDirectory(); + TObject *os; + TString workdir; + if (!fProductionMode && !fGridWorkingDir.BeginsWith("/alice")) workdir = gGrid->GetHomeDirectory(); workdir += fGridWorkingDir; + fMergingJDL->AddToInputDataCollection("LF:$1/Stage_$2.xml,nodownload", "Collection of files to be merged for stage $2"); + fMergingJDL->SetOutputDirectory("$1/Stage_$2/#alien_counter_03i#", "Output directory"); - if (!fRunNumbers.Length() && !fRunRange[0]) { - // One jdl with no parameters in case input data is specified by name. + if (fProductionMode) { TIter next(fInputFiles); - while ((os=(TObjString*)next())) - fGridJDL->AddToInputDataCollection(Form("LF:%s,nodownload", os->GetString().Data())); - if (!fOutputSingle.IsNull()) - fGridJDL->SetOutputDirectory(Form("#alienfulldir#/%s",fOutputSingle.Data())); - else - fGridJDL->SetOutputDirectory(Form("%s/#alien_counter_03i#", fGridOutputDir.Data())); - } else { - // One jdl to be submitted with 2 input parameters: data collection name and output dir prefix - fGridJDL->AddToInputDataCollection(Form("LF:%s/$1,nodownload", workdir.Data())); - if (!fOutputSingle.IsNull()) { - if (!fOutputToRunNo) fGridJDL->SetOutputDirectory(Form("#alienfulldir#/%s",fOutputSingle.Data())); - else fGridJDL->SetOutputDirectory(Form("%s/$2",fGridOutputDir.Data())); - } else { - fGridJDL->SetOutputDirectory(Form("%s/$2/#alien_counter_03i#", fGridOutputDir.Data())); - } + while ((os=next())) { + fGridJDL->AddToInputDataCollection(Form("LF:%s,nodownload", os->GetName()), "Input xml collections"); + } + fGridJDL->SetOutputDirectory(Form("%s/#alien_counter_04i#", fGridOutputDir.Data())); + } else { + if (!fRunNumbers.Length() && !fRunRange[0]) { + // One jdl with no parameters in case input data is specified by name. + TIter next(fInputFiles); + while ((os=next())) + fGridJDL->AddToInputDataCollection(Form("LF:%s,nodownload", os->GetName()), "Input xml collections"); + if (!fOutputSingle.IsNull()) + fGridJDL->SetOutputDirectory(Form("#alienfulldir#/../%s",fOutputSingle.Data()), "Output directory"); + else { + fGridJDL->SetOutputDirectory(Form("%s/#alien_counter_03i#", fGridOutputDir.Data()), "Output directory"); + fMergingJDL->SetOutputDirectory(fGridOutputDir); + } + } else { + // One jdl to be submitted with 2 input parameters: data collection name and output dir prefix + fGridJDL->AddToInputDataCollection(Form("LF:%s/$1,nodownload", workdir.Data()), "Input xml collections"); + if (!fOutputSingle.IsNull()) { + if (!fOutputToRunNo) fGridJDL->SetOutputDirectory(Form("#alienfulldir#/%s",fOutputSingle.Data()), "Output directory"); + else fGridJDL->SetOutputDirectory(Form("%s/$2",fGridOutputDir.Data()), "Output directory"); + } else { + fGridJDL->SetOutputDirectory(Form("%s/$2/#alien_counter_03i#", fGridOutputDir.Data()), "Output directory"); + } + } } - // Generate the JDL as a string TString sjdl = fGridJDL->Generate(); - Int_t index; - index = sjdl.Index("Executable"); - if (index >= 0) sjdl.Insert(index, "\n# This is the startup script\n"); - index = sjdl.Index("Split "); - if (index >= 0) sjdl.Insert(index, "\n# We split per SE or file\n"); - index = sjdl.Index("SplitMaxInputFileNumber"); - if (index >= 0) sjdl.Insert(index, "\n# We want each subjob to get maximum this number of input files\n"); - index = sjdl.Index("InputDataCollection"); - if (index >= 0) sjdl.Insert(index, "# Input xml collections\n"); - index = sjdl.Index("InputFile"); - if (index >= 0) sjdl.Insert(index, "\n# List of input files to be uploaded to wn's\n"); - index = sjdl.Index("InputDataList "); - if (index >= 0) sjdl.Insert(index, "\n# Collection to be processed on wn\n"); - index = sjdl.Index("InputDataListFormat"); - if (index >= 0) sjdl.Insert(index, "\n# Format of input data\n"); - index = sjdl.Index("Price"); - if (index >= 0) sjdl.Insert(index, "\n# AliEn price for this job\n"); - index = sjdl.Index("Requirements"); - if (index >= 0) sjdl.Insert(index, "\n# Additional requirements for the computing element\n"); - index = sjdl.Index("Packages"); - if (index >= 0) sjdl.Insert(index, "\n# Packages to be used\n"); - index = sjdl.Index("User ="); - if (index >= 0) sjdl.Insert(index, "\n# AliEn user\n"); - index = sjdl.Index("TTL"); - if (index >= 0) sjdl.Insert(index, "\n# Time to live for the job\n"); - index = sjdl.Index("OutputFile"); - if (index >= 0) sjdl.Insert(index, "\n# List of output files to be registered\n"); - index = sjdl.Index("OutputDir"); - if (index >= 0) sjdl.Insert(index, "\n# Output directory\n"); - index = sjdl.Index("OutputArchive"); - if (index >= 0) sjdl.Insert(index, "\n# Files to be archived\n"); - index = sjdl.Index("MaxInitFailed"); - if (index >= 0) sjdl.Insert(index, "\n# Maximum number of first failing jobs to abort the master job\n"); - index = sjdl.Index("MasterResubmitThreshold"); - if (index >= 0) sjdl.Insert(index, "\n# Resubmit failed jobs until DONE rate reaches this percentage\n"); - sjdl.ReplaceAll("ValidationCommand", "Validationcommand"); - index = sjdl.Index("Validationcommand"); - if (index >= 0) sjdl.Insert(index, "\n# Validation script to be run for each subjob\n"); + TString sjdl1 = fMergingJDL->Generate(); + fMergingJDL->SetOutputDirectory("$1", "Output directory"); + fMergingJDL->AddToInputSandbox("LF:$1/$4"); + TString sjdl2 = fMergingJDL->Generate(); + Int_t index, index1; sjdl.ReplaceAll("\"LF:", "\n \"LF:"); sjdl.ReplaceAll("(member", "\n (member"); sjdl.ReplaceAll("\",\"VO_", "\",\n \"VO_"); @@ -1010,29 +1285,132 @@ Bool_t AliAnalysisAlien::WriteJDL(Bool_t copy) sjdl.ReplaceAll("{\n \n", "{\n"); sjdl.ReplaceAll("\n\n", "\n"); sjdl.ReplaceAll("OutputDirectory", "OutputDir"); + sjdl1.ReplaceAll("\"LF:", "\n \"LF:"); + sjdl1.ReplaceAll("(member", "\n (member"); + sjdl1.ReplaceAll("\",\"VO_", "\",\n \"VO_"); + sjdl1.ReplaceAll("{", "{\n "); + sjdl1.ReplaceAll("};", "\n};"); + sjdl1.ReplaceAll("{\n \n", "{\n"); + sjdl1.ReplaceAll("\n\n", "\n"); + sjdl1.ReplaceAll("OutputDirectory", "OutputDir"); + sjdl2.ReplaceAll("\"LF:", "\n \"LF:"); + sjdl2.ReplaceAll("(member", "\n (member"); + sjdl2.ReplaceAll("\",\"VO_", "\",\n \"VO_"); + sjdl2.ReplaceAll("{", "{\n "); + sjdl2.ReplaceAll("};", "\n};"); + sjdl2.ReplaceAll("{\n \n", "{\n"); + sjdl2.ReplaceAll("\n\n", "\n"); + sjdl2.ReplaceAll("OutputDirectory", "OutputDir"); sjdl += "JDLVariables = \n{\n \"Packages\",\n \"OutputDir\"\n};\n"; sjdl.Prepend(Form("Jobtag = {\n \"comment:%s\"\n};\n", fJobTag.Data())); index = sjdl.Index("JDLVariables"); if (index >= 0) sjdl.Insert(index, "\n# JDL variables\n"); + sjdl += "Workdirectorysize = {\"5000MB\"};"; + sjdl1 += "Workdirectorysize = {\"5000MB\"};"; + sjdl1 += "JDLVariables = \n{\n \"Packages\",\n \"OutputDir\"\n};\n"; + index = fJobTag.Index(":"); + if (index < 0) index = fJobTag.Length(); + TString jobTag = fJobTag; + sjdl1.Prepend(Form("Jobtag = {\n \"comment:%s_Merging\"\n};\n", jobTag.Data())); + sjdl1.Prepend("# Generated merging jdl \ + \n# $1 = full alien path to output directory to be merged \ + \n# $2 = merging stage \ + \n# $3 = maximum number of files to merge (must be >= 10000 for the last stage) \ + \n# $4 = xml made via: find *Stage/*root_archive.zip\n"); + sjdl2.Prepend(Form("Jobtag = {\n \"comment:%s_FinalMerging\"\n};\n", jobTag.Data())); + sjdl2.Prepend("# Generated merging jdl \ + \n# $1 = full alien path to output directory to be merged \ + \n# $2 = merging stage \ + \n# $3 = maximum number of files to merge (must be >= 10000 for the last stage) \ + \n# $4 = xml made via: find *Stage/*root_archive.zip\n"); + index = sjdl1.Index("JDLVariables"); + if (index >= 0) sjdl1.Insert(index, "\n# JDL variables\n"); + index = sjdl2.Index("JDLVariables"); + if (index >= 0) sjdl2.Insert(index, "\n# JDL variables\n"); + sjdl1 += "Workdirectorysize = {\"5000MB\"};"; + sjdl2 += "Workdirectorysize = {\"5000MB\"};"; + index = sjdl2.Index("Split ="); + if (index>=0) { + index1 = sjdl2.Index("\n", index); + sjdl2.Remove(index, index1-index+1); + } + index = sjdl2.Index("SplitMaxInputFileNumber"); + if (index>=0) { + index1 = sjdl2.Index("\n", index); + sjdl2.Remove(index, index1-index+1); + } + index = sjdl2.Index("InputDataCollection"); + if (index>=0) { + index1 = sjdl2.Index(";", index); + sjdl2.Remove(index, index1-index+1); + } + index = sjdl2.Index("InputDataListFormat"); + if (index>=0) { + index1 = sjdl2.Index("\n", index); + sjdl2.Remove(index, index1-index+1); + } + index = sjdl2.Index("InputDataList"); + if (index>=0) { + index1 = sjdl2.Index("\n", index); + sjdl2.Remove(index, index1-index+1); + } + sjdl2.ReplaceAll("wn.xml", "$4"); // Write jdl to file ofstream out; out.open(fJDLName.Data(), ios::out); if (out.bad()) { - Error("CreateJDL", "Bad file name: %s", fJDLName.Data()); + Error("WriteJDL", "Bad file name: %s", fJDLName.Data()); return kFALSE; } out << sjdl << endl; + out.close(); + TString mergeJDLName = fExecutable; + mergeJDLName.ReplaceAll(".sh", "_merge.jdl"); + if (fMergeViaJDL) { + ofstream out1; + out1.open(mergeJDLName.Data(), ios::out); + if (out1.bad()) { + Error("WriteJDL", "Bad file name: %s", mergeJDLName.Data()); + return kFALSE; + } + out1 << sjdl1 << endl; + out1.close(); + ofstream out2; + TString finalJDL = mergeJDLName; + finalJDL.ReplaceAll(".jdl", "_final.jdl"); + out2.open(finalJDL.Data(), ios::out); + if (out2.bad()) { + Error("WriteJDL", "Bad file name: %s", finalJDL.Data()); + return kFALSE; + } + out2 << sjdl2 << endl; + out2.close(); + } // Copy jdl to grid workspace if (!copy) { - Info("CreateJDL", "\n##### You may want to review jdl:%s and analysis macro:%s before running in mode", fJDLName.Data(), fAnalysisMacro.Data()); + Info("WriteJDL", "\n##### You may want to review jdl:%s and analysis macro:%s before running in mode", fJDLName.Data(), fAnalysisMacro.Data()); } else { - Info("CreateJDL", "\n##### Copying JDL file <%s> to your AliEn output directory", fJDLName.Data()); TString locjdl = Form("%s/%s", fGridOutputDir.Data(),fJDLName.Data()); - if (fProductionMode) + TString locjdl1 = Form("%s/%s", fGridOutputDir.Data(),mergeJDLName.Data()); + TString finalJDL = mergeJDLName; + finalJDL.ReplaceAll(".jdl", "_final.jdl"); + TString locjdl2 = Form("%s/%s", fGridOutputDir.Data(),finalJDL.Data()); + if (fProductionMode) { locjdl = Form("%s/%s", workdir.Data(),fJDLName.Data()); + locjdl1 = Form("%s/%s", workdir.Data(),mergeJDLName.Data()); + locjdl2 = Form("%s/%s", workdir.Data(),finalJDL.Data()); + } if (FileExists(locjdl)) gGrid->Rm(locjdl); + if (FileExists(locjdl1)) gGrid->Rm(locjdl1); + if (FileExists(locjdl2)) gGrid->Rm(locjdl2); + Info("WriteJDL", "\n##### Copying JDL file <%s> to your AliEn output directory", fJDLName.Data()); TFile::Cp(Form("file:%s",fJDLName.Data()), Form("alien://%s", locjdl.Data())); + if (fMergeViaJDL) { + Info("WriteJDL", "\n##### Copying merging JDL files <%s> to your AliEn output directory", mergeJDLName.Data()); + TFile::Cp(Form("file:%s",mergeJDLName.Data()), Form("alien://%s", locjdl1.Data())); + TFile::Cp(Form("file:%s",finalJDL.Data()), Form("alien://%s", locjdl2.Data())); + } } return kTRUE; } @@ -1042,7 +1420,9 @@ Bool_t AliAnalysisAlien::FileExists(const char *lfn) { // Returns true if file exists. if (!gGrid) return kFALSE; - TGridResult *res = gGrid->Ls(lfn); + TString slfn = lfn; + slfn.ReplaceAll("alien://",""); + TGridResult *res = gGrid->Ls(slfn); if (!res) return kFALSE; TMap *map = dynamic_cast(res->At(0)); if (!map) { @@ -1088,77 +1468,77 @@ Bool_t AliAnalysisAlien::DirectoryExists(const char *dirname) } //______________________________________________________________________________ -void AliAnalysisAlien::CheckDataType(const char *lfn, Bool_t &is_collection, Bool_t &is_xml, Bool_t &use_tags) +void AliAnalysisAlien::CheckDataType(const char *lfn, Bool_t &isCollection, Bool_t &isXml, Bool_t &useTags) { // Check input data type. - is_collection = kFALSE; - is_xml = kFALSE; - use_tags = kFALSE; + isCollection = kFALSE; + isXml = kFALSE; + useTags = kFALSE; if (!gGrid) { Error("CheckDataType", "No connection to grid"); return; } - is_collection = IsCollection(lfn); + isCollection = IsCollection(lfn); TString msg = "\n##### file: "; msg += lfn; - if (is_collection) { + if (isCollection) { msg += " type: raw_collection;"; // special treatment for collections - is_xml = kFALSE; + isXml = kFALSE; // check for tag files in the collection TGridResult *res = gGrid->Command(Form("listFilesFromCollection -z -v %s",lfn), kFALSE); if (!res) { msg += " using_tags: No (unknown)"; - Info("CheckDataType", msg.Data()); + Info("CheckDataType", "%s", msg.Data()); return; } const char* typeStr = res->GetKey(0, "origLFN"); if (!typeStr || !strlen(typeStr)) { msg += " using_tags: No (unknown)"; - Info("CheckDataType", msg.Data()); + Info("CheckDataType", "%s", msg.Data()); return; } TString file = typeStr; - use_tags = file.Contains(".tag"); - if (use_tags) msg += " using_tags: Yes"; + useTags = file.Contains(".tag"); + if (useTags) msg += " using_tags: Yes"; else msg += " using_tags: No"; - Info("CheckDataType", msg.Data()); + Info("CheckDataType", "%s", msg.Data()); return; } TString slfn(lfn); slfn.ToLower(); - is_xml = slfn.Contains(".xml"); - if (is_xml) { + isXml = slfn.Contains(".xml"); + if (isXml) { // Open xml collection and check if there are tag files inside msg += " type: xml_collection;"; TGridCollection *coll = (TGridCollection*)gROOT->ProcessLine(Form("TAlienCollection::Open(\"alien://%s\",1);",lfn)); if (!coll) { msg += " using_tags: No (unknown)"; - Info("CheckDataType", msg.Data()); + Info("CheckDataType", "%s", msg.Data()); return; } TMap *map = coll->Next(); if (!map) { msg += " using_tags: No (unknown)"; - Info("CheckDataType", msg.Data()); + Info("CheckDataType", "%s", msg.Data()); return; } map = (TMap*)map->GetValue(""); TString file; if (map && map->GetValue("name")) file = map->GetValue("name")->GetName(); - use_tags = file.Contains(".tag"); + useTags = file.Contains(".tag"); delete coll; - if (use_tags) msg += " using_tags: Yes"; + if (useTags) msg += " using_tags: Yes"; else msg += " using_tags: No"; - Info("CheckDataType", msg.Data()); + Info("CheckDataType", "%s", msg.Data()); return; } - use_tags = slfn.Contains(".tag"); + useTags = slfn.Contains(".tag"); if (slfn.Contains(".root")) msg += " type: root file;"; - else msg += " type: unhnown file;"; - if (use_tags) msg += " using_tags: Yes"; + else msg += " type: unknown file;"; + if (useTags) msg += " using_tags: Yes"; else msg += " using_tags: No"; - Info("CheckDataType", msg.Data()); + Info("CheckDataType", "%s", msg.Data()); } //______________________________________________________________________________ @@ -1169,7 +1549,7 @@ void AliAnalysisAlien::EnablePackage(const char *package) pkg.ReplaceAll(".par", ""); pkg += ".par"; if (gSystem->AccessPathName(pkg)) { - Error("EnablePackage", "Package %s not found", pkg.Data()); + Fatal("EnablePackage", "Package %s not found", pkg.Data()); return; } if (!TObject::TestBit(AliAnalysisGrid::kUsePars)) @@ -1182,6 +1562,50 @@ void AliAnalysisAlien::EnablePackage(const char *package) fPackages->Add(new TObjString(pkg)); } +//______________________________________________________________________________ +TChain *AliAnalysisAlien::GetChainForTestMode(const char *treeName) const +{ +// Make a tree from files having the location specified in fFileForTestMode. +// Inspired from JF's CreateESDChain. + if (fFileForTestMode.IsNull()) { + Error("GetChainForTestMode", "For proof test mode please use SetFileForTestMode() pointing to a file that contains data file locations."); + return NULL; + } + if (gSystem->AccessPathName(fFileForTestMode)) { + Error("GetChainForTestMode", "File not found: %s", fFileForTestMode.Data()); + return NULL; + } + // Open the file + ifstream in; + in.open(fFileForTestMode); + Int_t count = 0; + // Read the input list of files and add them to the chain + TString line; + TChain *chain = new TChain(treeName); + while (in.good()) + { + in >> line; + if (line.IsNull()) continue; + if (count++ == fNtestFiles) break; + TString esdFile(line); + TFile *file = TFile::Open(esdFile); + if (file) { + if (!file->IsZombie()) chain->Add(esdFile); + file->Close(); + } else { + Error("GetChainforTestMode", "Skipping un-openable file: %s", esdFile.Data()); + } + } + in.close(); + if (!chain->GetListOfFiles()->GetEntries()) { + Error("GetChainForTestMode", "No file from %s could be opened", fFileForTestMode.Data()); + delete chain; + return NULL; + } +// chain->ls(); + return chain; +} + //______________________________________________________________________________ const char *AliAnalysisAlien::GetJobStatus(Int_t jobidstart, Int_t lastid, Int_t &nrunning, Int_t &nwaiting, Int_t &nerror, Int_t &ndone) { @@ -1252,6 +1676,52 @@ void AliAnalysisAlien::Print(Option_t *) const { // Print current plugin settings. printf("### AliEn analysis plugin current settings ###\n"); + AliAnalysisManager *mgr = AliAnalysisManager::GetAnalysisManager(); + if (mgr && mgr->IsProofMode()) { + TString proofType = "= PLUGIN IN PROOF MODE ON CLUSTER:_________________"; + if (TestBit(AliAnalysisGrid::kTest)) + proofType = "= PLUGIN IN PROOF LITE MODE ON CLUSTER:____________"; + printf("%s %s\n", proofType.Data(), fProofCluster.Data()); + if (!fProofDataSet.IsNull()) + printf("= Requested data set:___________________________ %s\n", fProofDataSet.Data()); + if (fProofReset==1) + printf("= Soft reset signal will be send to master______ CHANGE BEHAVIOR AFTER COMPLETION\n"); + if (fProofReset>1) + printf("= Hard reset signal will be send to master______ CHANGE BEHAVIOR AFTER COMPLETION\n"); + if (!fRootVersionForProof.IsNull()) + printf("= ROOT version requested________________________ %s\n", fRootVersionForProof.Data()); + else + printf("= ROOT version requested________________________ default\n"); + printf("= AliRoot version requested_____________________ %s\n", fAliROOTVersion.Data()); + if (!fAliRootMode.IsNull()) + printf("= Requested AliRoot mode________________________ %s\n", fAliRootMode.Data()); + if (fNproofWorkers) + printf("= Number of PROOF workers limited to____________ %d\n", fNproofWorkers); + if (fNproofWorkersPerSlave) + printf("= Maximum number of workers per slave___________ %d\n", fNproofWorkersPerSlave); + if (TestSpecialBit(kClearPackages)) + printf("= ClearPackages requested...\n"); + if (fIncludePath.Data()) + printf("= Include path for runtime task compilation: ___ %s\n", fIncludePath.Data()); + printf("= Additional libs to be loaded or souces to be compiled runtime: <%s>\n",fAdditionalLibs.Data()); + if (fPackages && fPackages->GetEntries()) { + TIter next(fPackages); + TObject *obj; + TString list; + while ((obj=next())) list += obj->GetName(); + printf("= Par files to be used: ________________________ %s\n", list.Data()); + } + if (TestSpecialBit(kProofConnectGrid)) + printf("= Requested PROOF connection to grid\n"); + return; + } + printf("= OverwriteMode:________________________________ %d\n", fOverwriteMode); + if (fOverwriteMode) { + printf("***** NOTE: Overwrite mode will overwrite the input generated datasets and partial results from previous analysis. \ + \n***** To disable, use: plugin->SetOverwriteMode(kFALSE);\n"); + } + printf("= Copy files to grid: __________________________ %s\n", (IsUseCopy())?"YES":"NO"); + printf("= Check if files can be copied to grid: ________ %s\n", (IsCheckCopy())?"YES":"NO"); printf("= Production mode:______________________________ %d\n", fProductionMode); printf("= Version of API requested: ____________________ %s\n", fAPIVersion.Data()); printf("= Version of ROOT requested: ___________________ %s\n", fROOTVersion.Data()); @@ -1279,6 +1749,7 @@ void AliAnalysisAlien::Print(Option_t *) const printf("= List of output files to be registered: _______ %s\n", fOutputFiles.Data()); printf("= List of outputs going to be archived: ________ %s\n", fOutputArchive.Data()); printf("= List of outputs that should not be merged: ___ %s\n", fMergeExcludes.Data()); + printf("= List of outputs produced during Terminate: ___ %s\n", fTerminateFiles.Data()); printf("=====================================================================\n"); printf("= Job price: ___________________________________ %d\n", fPrice); printf("= Time to live (TTL): __________________________ %d\n", fTTL); @@ -1287,6 +1758,7 @@ void AliAnalysisAlien::Print(Option_t *) const printf("= Max number of subjob fails to kill: __________ %d\n", fMaxInitFailed); if (fMasterResubmitThreshold>0) printf("= Resubmit master job if failed subjobs >_______ %d\n", fMasterResubmitThreshold); + printf("= Number of replicas for the output files_______ %d\n", fNreplicas); if (fNrunsPerMaster>0) printf("= Number of runs per master job: _______________ %d\n", fNrunsPerMaster); printf("= Number of files in one chunk to be merged: ___ %d\n", fMaxMergeFiles); @@ -1309,7 +1781,7 @@ void AliAnalysisAlien::Print(Option_t *) const printf("= Force job outputs to storage element: ________ %s\n", fCloseSE.Data()); if (fFriendChainName.Length()) printf("= Open friend chain file on worker: ____________ %s\n", fFriendChainName.Data()); - if (fPackages) { + if (fPackages && fPackages->GetEntries()) { TIter next(fPackages); TObject *obj; TString list; @@ -1324,12 +1796,14 @@ void AliAnalysisAlien::SetDefaults() // Set default values for everything. What cannot be filled will be left empty. if (fGridJDL) delete fGridJDL; fGridJDL = (TGridJDL*)gROOT->ProcessLine("new TAlienJDL()"); + fMergingJDL = (TGridJDL*)gROOT->ProcessLine("new TAlienJDL()"); fPrice = 1; fTTL = 30000; fSplitMaxInputFileNumber = 100; fMaxInitFailed = 0; fMasterResubmitThreshold = 0; fNtestFiles = 10; + fNreplicas = 2; fRunRange[0] = 0; fRunRange[1] = 0; fNrunsPerMaster = 1; @@ -1352,145 +1826,391 @@ void AliAnalysisAlien::SetDefaults() fDataPattern = "*AliESDs.root"; // Can be like: *AliESDs.root, */pass1/*AliESDs.root, ... fFriendChainName = ""; fGridOutputDir = "output"; - fOutputArchive = "log_archive.zip:stdout,stderr root_archive.zip:*.root"; + fOutputArchive = "log_archive.zip:std*,*.stat@disk=1 root_archive.zip:*.root@disk=2"; fOutputFiles = ""; // Like "AliAODs.root histos.root" fInputFormat = "xml-single"; fJDLName = "analysis.jdl"; fJobTag = "Automatically generated analysis JDL"; fMergeExcludes = ""; + fMergeViaJDL = 0; + SetUseCopy(kTRUE); + SetCheckCopy(kTRUE); + SetDefaultOutputs(kTRUE); + fOverwriteMode = 1; } //______________________________________________________________________________ -Bool_t AliAnalysisAlien::MergeOutputs() +Bool_t AliAnalysisAlien::CheckMergedFiles(const char *filename, const char *aliendir, Int_t nperchunk, const char *jdl) { -// Merge analysis outputs existing in the AliEn space. - if (TestBit(AliAnalysisGrid::kTest)) return kTRUE; - if (TestBit(AliAnalysisGrid::kOffline)) return kFALSE; - if (!Connect()) { - Error("MergeOutputs", "Cannot merge outputs without grid connection. Terminate will NOT be executed"); +// Checks current merge stage, makes xml for the next stage, counts number of files, submits next stage. + // First check if the result is already in the output directory. + if (FileExists(Form("%s/%s",aliendir,filename))) { + printf("Final merged results found. Not merging again.\n"); return kFALSE; - } - // Get the output path - if (!fGridOutputDir.Contains("/")) fGridOutputDir = Form("/%s/%s/%s", gGrid->GetHomeDirectory(), fGridWorkingDir.Data(), fGridOutputDir.Data()); - if (!DirectoryExists(fGridOutputDir)) { - Error("MergeOutputs", "Grid output directory %s not found. Terminate() will NOT be executed", fGridOutputDir.Data()); + } + // Now check the last stage done. + Int_t stage = 0; + while (1) { + if (!FileExists(Form("%s/Stage_%d.xml",aliendir, stage+1))) break; + stage++; + } + // Next stage of merging + stage++; + TString pattern = "*root_archive.zip"; + if (stage>1) pattern = Form("Stage_%d/*root_archive.zip", stage-1); + TGridResult *res = gGrid->Command(Form("find -x Stage_%d %s %s", stage, aliendir, pattern.Data())); + if (res) delete res; + // Write standard output to file + gROOT->ProcessLine(Form("gGrid->Stdout(); > %s", Form("Stage_%d.xml",stage))); + // Count the number of files inside + ifstream ifile; + ifile.open(Form("Stage_%d.xml",stage)); + if (!ifile.good()) { + ::Error("CheckMergedFiles", "Could not redirect result of the find command to file %s", Form("Stage_%d.xml",stage)); return kFALSE; + } + TString line; + Int_t nfiles = 0; + while (!ifile.eof()) { + ifile >> line; + if (line.Contains("/event")) nfiles++; } - if (!fOutputFiles.Length()) { - Error("MergeOutputs", "No output file names defined. Are you running the right AliAnalysisAlien configuration ?"); + ifile.close(); + if (!nfiles) { + ::Error("CheckMergedFiles", "Cannot start Stage_%d merging since Stage_%d did not produced yet output", stage, stage-1); return kFALSE; + } else { + printf("=== Stage_%d produced %d files\n", stage-1, nfiles); } - TObjArray *list = fOutputFiles.Tokenize(" "); - TIter next(list); - TObjString *str; + // Copy the file in the output directory + printf("===> Copying collection %s in the output directory %s\n", Form("Stage_%d.xml",stage), aliendir); + TFile::Cp(Form("Stage_%d.xml",stage), Form("alien://%s/Stage_%d.xml",aliendir,stage)); + // Check if this is the last stage to be done. + Bool_t laststage = (nfiles=fMaxMergeStages) laststage = kTRUE; + if (laststage) { + printf("### Submiting final merging stage %d\n", stage); + TString finalJDL = jdl; + finalJDL.ReplaceAll(".jdl", "_final.jdl"); + TString query = Form("submit %s %s %d 10000 Stage_%d.xml", finalJDL.Data(), aliendir, stage, stage); + Int_t jobId = SubmitSingleJob(query); + if (!jobId) return kFALSE; + } else { + printf("### Submiting merging stage %d\n", stage); + TString query = Form("submit %s %s %d %d wn.xml", jdl, aliendir, stage, nperchunk); + Int_t jobId = SubmitSingleJob(query); + if (!jobId) return kFALSE; + } + return kTRUE; +} + +//______________________________________________________________________________ +Int_t AliAnalysisAlien::SubmitSingleJob(const char *query) +{ +// Submits a single job corresponding to the query and returns job id. If 0 submission failed. + if (!gGrid) return 0; + printf("=> %s ------> ",query); + TGridResult *res = gGrid->Command(query); + if (!res) return 0; + TString jobId = res->GetKey(0,"jobId"); + delete res; + if (jobId.IsNull()) { + printf("submission failed. Reason:\n"); + gGrid->Stdout(); + gGrid->Stderr(); + ::Error("SubmitSingleJob", "Your query %s could not be submitted", query); + return 0; + } + printf(" Job id: %s\n", jobId.Data()); + return atoi(jobId); +} + +//______________________________________________________________________________ +Bool_t AliAnalysisAlien::MergeOutput(const char *output, const char *basedir, Int_t nmaxmerge, Int_t stage) +{ +// Merge given output files from basedir. Basedir can be an alien output directory +// but also an xml file with root_archive.zip locations. The file merger will merge nmaxmerge +// files in a group (ignored for xml input). Merging can be done in stages: +// stage=0 : will merge all existing files in a single stage, supporting resume if run locally +// stage=1 : works with an xml of all root_archive.zip in the output directory +// stage>1 : works with an xml of all root_archive.zip in the Stage_ directory + TString outputFile = output; TString command; - TString output_file; - TString output_chunk; - TString previous_chunk; - Int_t count_chunk = 0; - Int_t count_zero = fMaxMergeFiles; + TString outputChunk; + TString previousChunk = ""; + TObjArray *listoffiles = new TObjArray(); +// listoffiles->SetOwner(); + Int_t countChunk = 0; + Int_t countZero = nmaxmerge; Bool_t merged = kTRUE; - while((str=(TObjString*)next())) { - output_file = str->GetString(); - Int_t index = output_file.Index("@"); - if (index > 0) output_file.Remove(index); - // Skip already merged outputs - if (!gSystem->AccessPathName(output_file)) { - Info("MergeOutputs", "Output file <%s> found. Not merging again.", output_file.Data()); - continue; + Int_t index = outputFile.Index("@"); + if (index > 0) outputFile.Remove(index); + TString inputFile = outputFile; + TString sbasedir = basedir; + if (sbasedir.Contains(".xml")) { + // Merge files pointed by the xml - ignore nmaxmerge and set ichunk to 0 + nmaxmerge = 9999999; + TGridCollection *coll = (TGridCollection*)gROOT->ProcessLine(Form("TAlienCollection::Open(\"%s\");", basedir)); + if (!coll) { + ::Error("MergeOutput", "Input XML collection empty."); + return kFALSE; + } + // Iterate grid collection + while (coll->Next()) { + TString fname = gSystem->DirName(coll->GetTURL()); + fname += "/"; + fname += inputFile; + listoffiles->Add(new TNamed(fname.Data(),"")); } - if (fMergeExcludes.Length() && - fMergeExcludes.Contains(output_file.Data())) continue; - // Perform a 'find' command in the output directory, looking for registered outputs - command = Form("find %s/ *%s", fGridOutputDir.Data(), output_file.Data()); + } else { + command = Form("find %s/ *%s", basedir, inputFile.Data()); printf("command: %s\n", command.Data()); TGridResult *res = gGrid->Command(command); - if (!res) continue; - TFileMerger *fm = 0; + if (!res) { + ::Error("MergeOutput","No result for the find command\n"); + delete listoffiles; + return kFALSE; + } TIter nextmap(res); TMap *map = 0; - previous_chunk = ""; - count_chunk = 0; - // Check if there is a merge operation to resume - output_chunk = output_file; - output_chunk.ReplaceAll(".root", "_*.root"); - if (!gSystem->Exec(Form("ls %s", output_chunk.Data()))) { + while ((map=(TMap*)nextmap())) { + TObjString *objs = dynamic_cast(map->GetValue("turl")); + if (!objs || !objs->GetString().Length()) { + // Nothing found - skip this output + delete res; + delete listoffiles; + return kFALSE; + } + listoffiles->Add(new TNamed(objs->GetName(),"")); + } + delete res; + } + if (!listoffiles->GetEntries()) { + ::Error("MergeOutput","No result for the find command\n"); + delete listoffiles; + return kFALSE; + } + + TFileMerger *fm = 0; + TIter next0(listoffiles); + TObjArray *listoffilestmp = new TObjArray(); + listoffilestmp->SetOwner(); + TObject *nextfile; + TString snextfile; + // Keep only the files at upper level + Int_t countChar = 0; + while ((nextfile=next0())) { + snextfile = nextfile->GetName(); + Int_t crtCount = snextfile.CountChar('/'); + if (nextfile == listoffiles->First()) countChar = crtCount; + if (crtCount < countChar) countChar = crtCount; + } + next0.Reset(); + while ((nextfile=next0())) { + snextfile = nextfile->GetName(); + Int_t crtCount = snextfile.CountChar('/'); + if (crtCount > countChar) { + delete nextfile; + continue; + } + listoffilestmp->Add(nextfile); + } + delete listoffiles; + listoffiles = listoffilestmp; // Now contains 'good' files + listoffiles->Print(); + TIter next(listoffiles); + // Check if there is a merge operation to resume. Works only for stage 0 or 1. + outputChunk = outputFile; + outputChunk.ReplaceAll(".root", "_*.root"); + // Check for existent temporary merge files + // Check overwrite mode and remove previous partial results if needed + // Preserve old merging functionality for stage 0. + if (stage==0) { + if (!gSystem->Exec(Form("ls %s 2>/dev/null", outputChunk.Data()))) { while (1) { - for (Int_t counter=0; counter, nentries=%d", output_file.Data(), res->GetSize()); - delete res; - return kFALSE; + // Skip as many input files as in a chunk + for (Int_t counter=0; counterGetName(); } - output_chunk = output_file; - output_chunk.ReplaceAll(".root", Form("_%04d.root", count_chunk)); - printf("%s\n", output_chunk.Data()); - count_chunk++; - if (gSystem->AccessPathName(output_chunk)) continue; - // Merged file with chunks up to found - printf("Resume merging of <%s> from <%s>\n", output_file.Data(), output_chunk.Data()); - previous_chunk = output_chunk; + outputChunk = outputFile; + outputChunk.ReplaceAll(".root", Form("_%04d.root", countChunk)); + countChunk++; + if (gSystem->AccessPathName(outputChunk)) continue; + // Merged file with chunks up to found + ::Info("MergeOutput", "Resume merging of <%s> from <%s>\n", outputFile.Data(), outputChunk.Data()); + previousChunk = outputChunk; break; } - } - count_zero = fMaxMergeFiles; - while ((map=(TMap*)nextmap())) { - // Loop 'find' results and get next LFN - if (count_zero == fMaxMergeFiles) { + } + countZero = nmaxmerge; + + while ((nextfile=next())) { + snextfile = nextfile->GetName(); + // Loop 'find' results and get next LFN + if (countZero == nmaxmerge) { // First file in chunk - create file merger and add previous chunk if any. fm = new TFileMerger(kFALSE); fm->SetFastMethod(kTRUE); - if (previous_chunk.Length()) fm->AddFile(previous_chunk.Data()); - output_chunk = output_file; - output_chunk.ReplaceAll(".root", Form("_%04d.root", count_chunk)); + if (previousChunk.Length()) fm->AddFile(previousChunk.Data()); + outputChunk = outputFile; + outputChunk.ReplaceAll(".root", Form("_%04d.root", countChunk)); } // If last file found, put merged results in the output file - if (map == res->Last()) output_chunk = output_file; - TObjString *objs = dynamic_cast(map->GetValue("turl")); - if (!objs || !objs->GetString().Length()) { - // Nothing found - skip this output - delete res; - delete fm; - break; - } + if (nextfile == listoffiles->Last()) outputChunk = outputFile; // Add file to be merged and decrement chunk counter. - fm->AddFile(objs->GetString()); - count_zero--; - if (count_zero==0 || map == res->Last()) { - fm->OutputFile(output_chunk); + fm->AddFile(snextfile); + countZero--; + if (countZero==0 || nextfile == listoffiles->Last()) { if (!fm->GetMergeList() || !fm->GetMergeList()->GetSize()) { // Nothing found - skip this output - Warning("MergeOutputs", "No <%s> files found.", output_file.Data()); - delete res; - delete fm; + ::Warning("MergeOutput", "No <%s> files found.", inputFile.Data()); + merged = kFALSE; break; } + fm->OutputFile(outputChunk); // Merge the outputs, then go to next chunk if (!fm->Merge()) { - Error("MergeOutputs", "Could not merge all <%s> files", output_file.Data()); - delete res; - delete fm; + ::Error("MergeOutput", "Could not merge all <%s> files", outputFile.Data()); merged = kFALSE; break; } else { - Info("MergeOutputs", "\n##### Merged %d output files to <%s>", fm->GetMergeList()->GetSize(), output_chunk.Data()); - gSystem->Unlink(previous_chunk); + ::Info("MergeOutputs", "\n##### Merged %d output files to <%s>", fm->GetMergeList()->GetSize(), outputChunk.Data()); + gSystem->Unlink(previousChunk); } - if (map == res->Last()) { - delete res; - delete fm; - break; - } - count_chunk++; - count_zero = fMaxMergeFiles; - previous_chunk = output_chunk; + if (nextfile == listoffiles->Last()) break; + countChunk++; + countZero = nmaxmerge; + previousChunk = outputChunk; } } + delete listoffiles; + delete fm; + return merged; + } + // Merging stage different than 0. + // Move to the begining of the requested chunk. + fm = new TFileMerger(kFALSE); + fm->SetFastMethod(kTRUE); + while ((nextfile=next())) fm->AddFile(nextfile->GetName()); + delete listoffiles; + if (!fm->GetMergeList() || !fm->GetMergeList()->GetSize()) { + // Nothing found - skip this output + ::Warning("MergeOutput", "No <%s> files found.", inputFile.Data()); + delete fm; + return kFALSE; + } + fm->OutputFile(outputFile); + // Merge the outputs + if (!fm->Merge()) { + ::Error("MergeOutput", "Could not merge all <%s> files", outputFile.Data()); + delete fm; + return kFALSE; + } else { + ::Info("MergeOutput", "\n##### Merged %d output files to <%s>", fm->GetMergeList()->GetSize(), outputFile.Data()); + } + delete fm; + return kTRUE; +} + +//______________________________________________________________________________ +Bool_t AliAnalysisAlien::MergeOutputs() +{ +// Merge analysis outputs existing in the AliEn space. + if (TestBit(AliAnalysisGrid::kTest)) return kTRUE; + if (TestBit(AliAnalysisGrid::kOffline)) return kFALSE; + if (!Connect()) { + Error("MergeOutputs", "Cannot merge outputs without grid connection. Terminate will NOT be executed"); + return kFALSE; + } + if (fMergeViaJDL) { + if (!TestBit(AliAnalysisGrid::kMerge)) { + Info("MergeOutputs", "### Re-run with option in terminate mode of the plugin to submit merging jobs ###"); + return kFALSE; + } + if (fProductionMode) { + Info("MergeOutputs", "### Merging will be submitted by LPM manager... ###"); + return kFALSE; + } + Info("MergeOutputs", "Submitting merging JDL"); + if (!SubmitMerging()) return kFALSE; + Info("MergeOutputs", "### Re-run with off to collect results after merging jobs are done ###"); + Info("MergeOutputs", "### The Terminate() method is executed by the merging jobs"); + return kFALSE; + } + // Get the output path + if (!fGridOutputDir.Contains("/")) fGridOutputDir = Form("/%s/%s/%s", gGrid->GetHomeDirectory(), fGridWorkingDir.Data(), fGridOutputDir.Data()); + if (!DirectoryExists(fGridOutputDir)) { + Error("MergeOutputs", "Grid output directory %s not found. Terminate() will NOT be executed", fGridOutputDir.Data()); + return kFALSE; + } + if (!fOutputFiles.Length()) { + Error("MergeOutputs", "No output file names defined. Are you running the right AliAnalysisAlien configuration ?"); + return kFALSE; + } + // Check if fast read option was requested + Info("MergeOutputs", "Started local merging of output files from: alien://%s \ + \n======= overwrite mode = %d", fGridOutputDir.Data(), (Int_t)fOverwriteMode); + if (fFastReadOption) { + Warning("MergeOutputs", "You requested FastRead option. Using xrootd flags to reduce timeouts. This may skip some files that could be accessed ! \ + \n+++ NOTE: To disable this option, use: plugin->SetFastReadOption(kFALSE)"); + gEnv->SetValue("XNet.ConnectTimeout",50); + gEnv->SetValue("XNet.RequestTimeout",50); + gEnv->SetValue("XNet.MaxRedirectCount",2); + gEnv->SetValue("XNet.ReconnectTimeout",50); + gEnv->SetValue("XNet.FirstConnectMaxCnt",1); + } + // Make sure we change the temporary directory + gSystem->Setenv("TMPDIR", gSystem->pwd()); + TObjArray *list = fOutputFiles.Tokenize(","); + TIter next(list); + TObjString *str; + TString outputFile; + Bool_t merged = kTRUE; + while((str=(TObjString*)next())) { + outputFile = str->GetString(); + Int_t index = outputFile.Index("@"); + if (index > 0) outputFile.Remove(index); + TString outputChunk = outputFile; + outputChunk.ReplaceAll(".root", "_*.root"); + // Skip already merged outputs + if (!gSystem->AccessPathName(outputFile)) { + if (fOverwriteMode) { + Info("MergeOutputs", "Overwrite mode. Existing file %s was deleted.", outputFile.Data()); + gSystem->Unlink(outputFile); + if (!gSystem->Exec(Form("ls %s 2>/dev/null", outputChunk.Data()))) { + Info("MergeOutput", "Overwrite mode: partial merged files %s will removed", + outputChunk.Data()); + gSystem->Exec(Form("rm -f %s", outputChunk.Data())); + } + } else { + Info("MergeOutputs", "Output file <%s> found. Not merging again.", outputFile.Data()); + continue; + } + } else { + if (!gSystem->Exec(Form("ls %s 2>/dev/null", outputChunk.Data()))) { + Info("MergeOutput", "Overwrite mode: partial merged files %s will removed", + outputChunk.Data()); + gSystem->Exec(Form("rm -f %s", outputChunk.Data())); + } + } + if (fMergeExcludes.Length() && + fMergeExcludes.Contains(outputFile.Data())) continue; + // Perform a 'find' command in the output directory, looking for registered outputs + merged = MergeOutput(outputFile, fGridOutputDir, fMaxMergeFiles); + if (!merged) { + Error("MergeOutputs", "Terminate() will NOT be executed"); + return kFALSE; + } + TFile *fileOpened = (TFile*)gROOT->GetListOfFiles()->FindObject(outputFile); + if (fileOpened) fileOpened->Close(); } - if (!merged) { - Error("MergeOutputs", "Terminate() will NOT be executed"); - } - return merged; + return kTRUE; } //______________________________________________________________________________ @@ -1499,45 +2219,308 @@ void AliAnalysisAlien::SetDefaultOutputs(Bool_t flag) // Use the output files connected to output containers from the analysis manager // rather than the files defined by SetOutputFiles if (flag && !TObject::TestBit(AliAnalysisGrid::kDefaultOutputs)) - Info("SetDefaultOutputs", "Plugin will use the output files taken from \ - analysis manager"); + Info("SetDefaultOutputs", "Plugin will use the output files taken from analysis manager"); TObject::SetBit(AliAnalysisGrid::kDefaultOutputs, flag); } //______________________________________________________________________________ -Bool_t AliAnalysisAlien::StartAnalysis(Long64_t /*nentries*/, Long64_t /*firstEntry*/) +void AliAnalysisAlien::SetOutputFiles(const char *list) { -// Start remote grid analysis. - - // Check if output files have to be taken from the analysis manager - if (TestBit(AliAnalysisGrid::kDefaultOutputs)) { - AliAnalysisManager *mgr = AliAnalysisManager::GetAnalysisManager(); - if (!mgr || !mgr->IsInitialized()) { - Error("StartAnalysis", "You need an initialized analysis manager for this"); - return kFALSE; - } - fOutputFiles = ""; - TIter next(mgr->GetOutputs()); - AliAnalysisDataContainer *output; - while ((output=(AliAnalysisDataContainer*)next())) { - const char *filename = output->GetFileName(); - if (!(strcmp(filename, "default"))) { - if (!mgr->GetOutputEventHandler()) continue; - filename = mgr->GetOutputEventHandler()->GetOutputFileName(); - } - if (fOutputFiles.Contains(filename)) continue; - if (fOutputFiles.Length()) fOutputFiles += " "; - fOutputFiles += filename; - } - // Add extra files registered to the analysis manager - if (mgr->GetExtraFiles().Length()) { - if (fOutputFiles.Length()) fOutputFiles += " "; - fOutputFiles += mgr->GetExtraFiles(); - } +// Manually set the output files list. +// Removes duplicates. Not allowed if default outputs are not disabled. + if (TObject::TestBit(AliAnalysisGrid::kDefaultOutputs)) { + Fatal("SetOutputFiles", "You have to explicitly call SetDefaultOutputs(kFALSE) to manually set output files."); + return; } -// if (!fCloseSE.Length()) fCloseSE = gSystem->Getenv("alien_CLOSE_SE"); - if (TestBit(AliAnalysisGrid::kOffline)) { - Info("StartAnalysis","\n##### OFFLINE MODE ##### Files to be used in GRID are produced but not copied \ + Info("SetOutputFiles", "Output file list is set manually - you are on your own."); + fOutputFiles = ""; + TString slist = list; + if (slist.Contains("@")) Warning("SetOutputFiles","The plugin does not allow explicit SE's. Please use: SetNumberOfReplicas() instead."); + TObjArray *arr = slist.Tokenize(" "); + TObjString *os; + TIter next(arr); + TString sout; + while ((os=(TObjString*)next())) { + sout = os->GetString(); + if (sout.Index("@")>0) sout.Remove(sout.Index("@")); + if (fOutputFiles.Contains(sout)) continue; + if (!fOutputFiles.IsNull()) fOutputFiles += ","; + fOutputFiles += sout; + } + delete arr; +} + +//______________________________________________________________________________ +void AliAnalysisAlien::SetOutputArchive(const char *list) +{ +// Manually set the output archive list. Free text - you are on your own... +// Not allowed if default outputs are not disabled. + if (TObject::TestBit(AliAnalysisGrid::kDefaultOutputs)) { + Fatal("SetOutputArchive", "You have to explicitly call SetDefaultOutputs(kFALSE) to manually set the output archives."); + return; + } + Info("SetOutputArchive", "Output archive is set manually - you are on your own."); + fOutputArchive = list; +} + +//______________________________________________________________________________ +void AliAnalysisAlien::SetPreferedSE(const char */*se*/) +{ +// Setting a prefered output SE is not allowed anymore. + Warning("SetPreferedSE", "Setting a preferential SE is not allowed anymore via the plugin. Use SetNumberOfReplicas() and SetDefaultOutputs()"); +} + +//______________________________________________________________________________ +Bool_t AliAnalysisAlien::StartAnalysis(Long64_t /*nentries*/, Long64_t /*firstEntry*/) +{ +// Start remote grid analysis. + AliAnalysisManager *mgr = AliAnalysisManager::GetAnalysisManager(); + Bool_t testMode = TestBit(AliAnalysisGrid::kTest); + if (!mgr || !mgr->IsInitialized()) { + Error("StartAnalysis", "You need an initialized analysis manager for this"); + return kFALSE; + } + // Are we in PROOF mode ? + if (mgr->IsProofMode()) { + Info("StartAnalysis", "##### Starting PROOF analysis on cluster <%s> via the plugin #####", fProofCluster.Data()); + if (fProofCluster.IsNull()) { + Error("StartAnalysis", "You need to specify the proof cluster name via SetProofCluster"); + return kFALSE; + } + if (fProofDataSet.IsNull() && !testMode) { + Error("StartAnalysis", "You need to specify a dataset using SetProofDataSet()"); + return kFALSE; + } + // Set the needed environment + gEnv->SetValue("XSec.GSI.DelegProxy","2"); + // Do we need to reset PROOF ? The success of the Reset operation cannot be checked + if (fProofReset && !testMode) { + if (fProofReset==1) { + Info("StartAnalysis", "Sending soft reset signal to proof cluster %s", fProofCluster.Data()); + gROOT->ProcessLine(Form("TProof::Reset(\"%s\", kFALSE);", fProofCluster.Data())); + } else { + Info("StartAnalysis", "Sending hard reset signal to proof cluster %s", fProofCluster.Data()); + gROOT->ProcessLine(Form("TProof::Reset(\"%s\", kTRUE);", fProofCluster.Data())); + } + Info("StartAnalysis", "Stopping the analysis. Please use SetProofReset(0) to resume."); + return kFALSE; + } + // Do we need to change the ROOT version ? The success of this cannot be checked. + if (!fRootVersionForProof.IsNull() && !testMode) { + gROOT->ProcessLine(Form("TProof::Mgr(\"%s\")->SetROOTVersion(\"%s\");", + fProofCluster.Data(), fRootVersionForProof.Data())); + } + // Connect to PROOF and check the status + Long_t proof = 0; + TString sworkers; + if (fNproofWorkersPerSlave) sworkers = Form("workers=%dx", fNproofWorkersPerSlave); + else if (fNproofWorkers) sworkers = Form("workers=%d", fNproofWorkers); + if (!testMode) { + if (!sworkers.IsNull()) + proof = gROOT->ProcessLine(Form("TProof::Open(\"%s\", \"%s\");", fProofCluster.Data(), sworkers.Data())); + else + proof = gROOT->ProcessLine(Form("TProof::Open(\"%s\");", fProofCluster.Data())); + } else { + proof = gROOT->ProcessLine("TProof::Open(\"\");"); + if (!proof) { + Error("StartAnalysis", "Could not start PROOF in test mode"); + return kFALSE; + } + } + if (!proof) { + Error("StartAnalysis", "Could not connect to PROOF cluster <%s>", fProofCluster.Data()); + return kFALSE; + } + if (fNproofWorkersPerSlave*fNproofWorkers > 0) + gROOT->ProcessLine(Form("gProof->SetParallel(%d);", fNproofWorkers)); + // Is dataset existing ? + if (!testMode) { + TString dataset = fProofDataSet; + Int_t index = dataset.Index("#"); + if (index>=0) dataset.Remove(index); +// if (!gROOT->ProcessLine(Form("gProof->ExistsDataSet(\"%s\");",fProofDataSet.Data()))) { +// Error("StartAnalysis", "Dataset %s not existing", fProofDataSet.Data()); +// return kFALSE; +// } +// Info("StartAnalysis", "Dataset %s found", dataset.Data()); + } + // Is ClearPackages() needed ? + if (TestSpecialBit(kClearPackages)) { + Info("StartAnalysis", "ClearPackages signal sent to PROOF. Use SetClearPackages(kFALSE) to reset this."); + gROOT->ProcessLine("gProof->ClearPackages();"); + } + // Is a given aliroot mode requested ? + TList optionsList; + TString parLibs; + if (!fAliRootMode.IsNull()) { + TString alirootMode = fAliRootMode; + if (alirootMode == "default") alirootMode = ""; + Info("StartAnalysis", "You are requesting AliRoot mode: %s", fAliRootMode.Data()); + optionsList.SetOwner(); + optionsList.Add(new TNamed("ALIROOT_MODE", alirootMode.Data())); + // Check the additional libs to be loaded + TString extraLibs; + Bool_t parMode = kFALSE; + if (!alirootMode.IsNull()) extraLibs = "ANALYSIS:ANALYSISalice"; + // Parse the extra libs for .so + if (fAdditionalLibs.Length()) { + TObjArray *list = fAdditionalLibs.Tokenize(" "); + TIter next(list); + TObjString *str; + while((str=(TObjString*)next())) { + if (str->GetString().Contains(".so")) { + if (parMode) { + Warning("StartAnalysis", "Plugin does not support loading libs after par files in PROOF mode. Library %s and following will not load on workers", str->GetName()); + break; + } + TString stmp = str->GetName(); + if (stmp.BeginsWith("lib")) stmp.Remove(0,3); + stmp.ReplaceAll(".so",""); + if (!extraLibs.IsNull()) extraLibs += ":"; + extraLibs += stmp; + continue; + } + if (str->GetString().Contains(".par")) { + // The first par file found in the list will not allow any further .so + parMode = kTRUE; + if (!parLibs.IsNull()) parLibs += ":"; + parLibs += str->GetName(); + continue; + } + } + if (list) delete list; + } + if (!extraLibs.IsNull()) optionsList.Add(new TNamed("ALIROOT_EXTRA_LIBS",extraLibs.Data())); + // Check extra includes + if (!fIncludePath.IsNull()) { + TString includePath = fIncludePath; + includePath.ReplaceAll(" ",":"); + includePath.Strip(TString::kTrailing, ':'); + Info("StartAnalysis", "Adding extra includes: %s",includePath.Data()); + optionsList.Add(new TNamed("ALIROOT_EXTRA_INCLUDES",includePath.Data())); + } + // Check if connection to grid is requested + if (TestSpecialBit(kProofConnectGrid)) + optionsList.Add(new TNamed("ALIROOT_ENABLE_ALIEN", "1")); + // Enable AliRoot par + if (testMode) { + // Enable proof lite package + TString alirootLite = gSystem->ExpandPathName("$ALICE_ROOT/ANALYSIS/macros/AliRootProofLite.par"); + for (Int_t i=0; iGetName(), obj->GetTitle()); + } + if (!gROOT->ProcessLine(Form("gProof->UploadPackage(\"%s\");",alirootLite.Data())) + && !gROOT->ProcessLine(Form("gProof->EnablePackage(\"%s\", (TList*)0x%lx);",alirootLite.Data(),(ULong_t)&optionsList))) { + Info("StartAnalysis", "AliRootProofLite enabled"); + } else { + Error("StartAnalysis", "There was an error trying to enable package AliRootProofLite.par"); + return kFALSE; + } + } else { + if (gROOT->ProcessLine(Form("gProof->EnablePackage(\"VO_ALICE@AliRoot::%s\", (TList*)0x%lx);", + fAliROOTVersion.Data(), (ULong_t)&optionsList))) { + Error("StartAnalysis", "There was an error trying to enable package VO_ALICE@AliRoot::%s", fAliROOTVersion.Data()); + return kFALSE; + } + } + // Enable first par files from fAdditionalLibs + if (!parLibs.IsNull()) { + TObjArray *list = parLibs.Tokenize(":"); + TIter next(list); + TObjString *package; + while((package=(TObjString*)next())) { + TString spkg = package->GetName(); + spkg.ReplaceAll(".par", ""); + gSystem->Exec(TString::Format("rm -rf %s", spkg.Data())); + if (!gROOT->ProcessLine(Form("gProof->UploadPackage(\"%s\");", package->GetName()))) { + TString enablePackage = (testMode)?Form("gProof->EnablePackage(\"%s\",kFALSE);", package->GetName()):Form("gProof->EnablePackage(\"%s\",kTRUE);", package->GetName()); + if (gROOT->ProcessLine(enablePackage)) { + Error("StartAnalysis", "There was an error trying to enable package %s", package->GetName()); + return kFALSE; + } + } else { + Error("StartAnalysis", "There was an error trying to upload package %s", package->GetName()); + return kFALSE; + } + } + if (list) delete list; + } + } else { + if (fAdditionalLibs.Contains(".so") && !testMode) { + Error("StartAnalysis", "You request additional libs to be loaded but did not enabled any AliRoot mode. Please refer to: \ + \n http://aaf.cern.ch/node/83 and use a parameter for SetAliRootMode()"); + return kFALSE; + } + } + // Enable par files if requested + if (fPackages && fPackages->GetEntries()) { + TIter next(fPackages); + TObject *package; + while ((package=next())) { + // Skip packages already enabled + if (parLibs.Contains(package->GetName())) continue; + TString spkg = package->GetName(); + spkg.ReplaceAll(".par", ""); + gSystem->Exec(TString::Format("rm -rf %s", spkg.Data())); + if (gROOT->ProcessLine(Form("gProof->UploadPackage(\"%s\");", package->GetName()))) { + if (gROOT->ProcessLine(Form("gProof->EnablePackage(\"%s\",kTRUE);", package->GetName()))) { + Error("StartAnalysis", "There was an error trying to enable package %s", package->GetName()); + return kFALSE; + } + } else { + Error("StartAnalysis", "There was an error trying to upload package %s", package->GetName()); + return kFALSE; + } + } + } + // Do we need to load analysis source files ? + // NOTE: don't load on client since this is anyway done by the user to attach his task. + if (fAnalysisSource.Length()) { + TObjArray *list = fAnalysisSource.Tokenize(" "); + TIter next(list); + TObjString *str; + while((str=(TObjString*)next())) { + gROOT->ProcessLine(Form("gProof->Load(\"%s+g\", kTRUE);", str->GetName())); + } + if (list) delete list; + } + if (testMode) { + // Register dataset to proof lite. + if (fFileForTestMode.IsNull()) { + Error("GetChainForTestMode", "For proof test mode please use SetFileForTestMode() pointing to a file that contains data file locations."); + return kFALSE; + } + if (gSystem->AccessPathName(fFileForTestMode)) { + Error("GetChainForTestMode", "File not found: %s", fFileForTestMode.Data()); + return kFALSE; + } + TFileCollection *coll = new TFileCollection(); + coll->AddFromFile(fFileForTestMode); + gROOT->ProcessLine(Form("gProof->RegisterDataSet(\"test_collection\", (TFileCollection*)0x%lx, \"OV\");", (ULong_t)coll)); + gROOT->ProcessLine("gProof->ShowDataSets()"); + } + return kTRUE; + } + + // Check if output files have to be taken from the analysis manager + if (TestBit(AliAnalysisGrid::kDefaultOutputs)) { + // Add output files and AOD files + fOutputFiles = GetListOfFiles("outaod"); + // Add extra files registered to the analysis manager + TString extra = GetListOfFiles("ext"); + if (!extra.IsNull()) { + extra.ReplaceAll(".root", "*.root"); + if (!fOutputFiles.IsNull()) fOutputFiles += ","; + fOutputFiles += extra; + } + // Compose the output archive. + fOutputArchive = "log_archive.zip:std*,*.stat@disk=1 "; + fOutputArchive += Form("root_archive.zip:%s@disk=%d",fOutputFiles.Data(),fNreplicas); + } +// if (!fCloseSE.Length()) fCloseSE = gSystem->Getenv("alien_CLOSE_SE"); + if (TestBit(AliAnalysisGrid::kOffline)) { + Info("StartAnalysis","\n##### OFFLINE MODE ##### Files to be used in GRID are produced but not copied \ \n there nor any job run. You can revise the JDL and analysis \ \n macro then run the same in \"submit\" mode."); } else if (TestBit(AliAnalysisGrid::kTest)) { @@ -1548,47 +2531,60 @@ Bool_t AliAnalysisAlien::StartAnalysis(Long64_t /*nentries*/, Long64_t /*firstEn \n space and job submitted."); } else if (TestBit(AliAnalysisGrid::kMerge)) { Info("StartAnalysis","\n##### MERGE MODE ##### The registered outputs of the analysis will be merged"); + if (fMergeViaJDL) CheckInputData(); return kTRUE; } else { Info("StartAnalysis","\n##### FULL ANALYSIS MODE ##### Producing needed files and submitting your analysis job..."); } + Print(); if (!Connect()) { Error("StartAnalysis", "Cannot start grid analysis without grid connection"); return kFALSE; } - Print(); + if (IsCheckCopy() && gGrid) CheckFileCopy(gGrid->GetHomeDirectory()); if (!CheckInputData()) { Error("StartAnalysis", "There was an error in preprocessing your requested input data"); return kFALSE; } - CreateDataset(fDataPattern); + if (!CreateDataset(fDataPattern)) { + TString serror; + if (!fRunNumbers.Length() && !fRunRange[0]) serror = Form("path to data directory: <%s>", fGridDataDir.Data()); + if (fRunNumbers.Length()) serror = "run numbers"; + if (fRunRange[0]) serror = Form("run range [%d, %d]", fRunRange[0], fRunRange[1]); + serror += Form("\n or data pattern <%s>", fDataPattern.Data()); + Error("StartAnalysis", "No data to process. Please fix %s in your plugin configuration.", serror.Data()); + return kFALSE; + } WriteAnalysisFile(); WriteAnalysisMacro(); WriteExecutable(); WriteValidationScript(); + if (fMergeViaJDL) { + WriteMergingMacro(); + WriteMergeExecutable(); + WriteValidationScript(kTRUE); + } if (!CreateJDL()) return kFALSE; if (TestBit(AliAnalysisGrid::kOffline)) return kFALSE; - if (TestBit(AliAnalysisGrid::kTest)) { + if (testMode) { // Locally testing the analysis Info("StartAnalysis", "\n_______________________________________________________________________ \ \n Running analysis script in a daughter shell as on a worker node \ \n_______________________________________________________________________"); - TObjArray *list = fOutputFiles.Tokenize(" "); + TObjArray *list = fOutputFiles.Tokenize(","); TIter next(list); TObjString *str; - TString output_file; + TString outputFile; while((str=(TObjString*)next())) { - output_file = str->GetString(); - Int_t index = output_file.Index("@"); - if (index > 0) output_file.Remove(index); - if (!gSystem->AccessPathName(output_file)) gSystem->Exec(Form("rm %s", output_file.Data())); + outputFile = str->GetString(); + Int_t index = outputFile.Index("@"); + if (index > 0) outputFile.Remove(index); + if (!gSystem->AccessPathName(outputFile)) gSystem->Exec(Form("rm %s", outputFile.Data())); } delete list; gSystem->Exec(Form("bash %s 2>stderr", fExecutable.Data())); - TString validationScript = fExecutable; - validationScript.ReplaceAll(".sh", "_validation.sh"); - gSystem->Exec(Form("bash %s",validationScript.Data())); + gSystem->Exec(Form("bash %s",fValidationScript.Data())); // gSystem->Exec("cat stdout"); return kFALSE; } @@ -1611,6 +2607,8 @@ Bool_t AliAnalysisAlien::StartAnalysis(Long64_t /*nentries*/, Long64_t /*firstEn if (res) { const char *cjobId = res->GetKey(0,"jobId"); if (!cjobId) { + gGrid->Stdout(); + gGrid->Stderr(); Error("StartAnalysis", "Your JDL %s could not be submitted", fJDLName.Data()); return kFALSE; } else { @@ -1621,10 +2619,13 @@ Bool_t AliAnalysisAlien::StartAnalysis(Long64_t /*nentries*/, Long64_t /*firstEn jobID = cjobId; } delete res; + } else { + Error("StartAnalysis", "No grid result after submission !!! Bailing out..."); + return kFALSE; } } else { // Submit for a range of enumeration of runs. - Submit(); + if (!Submit()) return kFALSE; } Info("StartAnalysis", "\n#### STARTING AN ALIEN SHELL FOR YOU. EXIT WHEN YOUR JOB %s HAS FINISHED. #### \ @@ -1635,52 +2636,206 @@ Bool_t AliAnalysisAlien::StartAnalysis(Long64_t /*nentries*/, Long64_t /*firstEn } //______________________________________________________________________________ -void AliAnalysisAlien::Submit() +const char *AliAnalysisAlien::GetListOfFiles(const char *type) +{ +// Get a comma-separated list of output files of the requested type. +// Type can be (case unsensitive): +// aod - list of aod files (std, extensions and filters) +// out - list of output files connected to containers (but not aod's or extras) +// ext - list of extra files registered to the manager +// ter - list of files produced in terminate + static TString files; + files = ""; + TString stype = type; + stype.ToLower(); + TString aodfiles, extra; + AliAnalysisManager *mgr = AliAnalysisManager::GetAnalysisManager(); + if (!mgr) { + ::Error("GetListOfFiles", "Cannot call this without analysis manager"); + return files.Data(); + } + if (mgr->GetOutputEventHandler()) { + aodfiles = mgr->GetOutputEventHandler()->GetOutputFileName(); + TString extraaod = mgr->GetOutputEventHandler()->GetExtraOutputs(); + if (!extraaod.IsNull()) { + aodfiles += ","; + aodfiles += extraaod; + } + } + if (stype.Contains("aod")) { + files = aodfiles; + if (stype == "aod") return files.Data(); + } + // Add output files that are not in the list of AOD files + TString outputfiles = ""; + TIter next(mgr->GetOutputs()); + AliAnalysisDataContainer *output; + const char *filename = 0; + while ((output=(AliAnalysisDataContainer*)next())) { + filename = output->GetFileName(); + if (!(strcmp(filename, "default"))) continue; + if (outputfiles.Contains(filename)) continue; + if (aodfiles.Contains(filename)) continue; + if (!outputfiles.IsNull()) outputfiles += ","; + outputfiles += filename; + } + if (stype.Contains("out")) { + if (!files.IsNull()) files += ","; + files += outputfiles; + if (stype == "out") return files.Data(); + } + // Add extra files registered to the analysis manager + TString sextra; + extra = mgr->GetExtraFiles(); + if (!extra.IsNull()) { + extra.Strip(); + extra.ReplaceAll(" ", ","); + TObjArray *fextra = extra.Tokenize(","); + TIter nextx(fextra); + TObject *obj; + while ((obj=nextx())) { + if (aodfiles.Contains(obj->GetName())) continue; + if (outputfiles.Contains(obj->GetName())) continue; + if (sextra.Contains(obj->GetName())) continue; + if (!sextra.IsNull()) sextra += ","; + sextra += obj->GetName(); + } + delete fextra; + if (stype.Contains("ext")) { + if (!files.IsNull()) files += ","; + files += sextra; + } + } + if (stype == "ext") return files.Data(); + TString termfiles; + if (!fTerminateFiles.IsNull()) { + fTerminateFiles.Strip(); + fTerminateFiles.ReplaceAll(" ",","); + TObjArray *fextra = fTerminateFiles.Tokenize(","); + TIter nextx(fextra); + TObject *obj; + while ((obj=nextx())) { + if (aodfiles.Contains(obj->GetName())) continue; + if (outputfiles.Contains(obj->GetName())) continue; + if (termfiles.Contains(obj->GetName())) continue; + if (sextra.Contains(obj->GetName())) continue; + if (!termfiles.IsNull()) termfiles += ","; + termfiles += obj->GetName(); + } + delete fextra; + } + if (stype.Contains("ter")) { + if (!files.IsNull() && !termfiles.IsNull()) { + files += ","; + files += termfiles; + } + } + return files.Data(); +} + +//______________________________________________________________________________ +Bool_t AliAnalysisAlien::Submit() { // Submit all master jobs. Int_t nmasterjobs = fInputFiles->GetEntries(); Long_t tshoot = gSystem->Now(); - if (!fNsubmitted) SubmitNext(); + if (!fNsubmitted && !SubmitNext()) return kFALSE; while (fNsubmitted < nmasterjobs) { Long_t now = gSystem->Now(); if ((now-tshoot)>30000) { tshoot = now; - SubmitNext(); + if (!SubmitNext()) return kFALSE; } } + return kTRUE; } //______________________________________________________________________________ -void AliAnalysisAlien::SubmitNext() +Bool_t AliAnalysisAlien::SubmitMerging() { -// Submit next bunch of master jobs if the queue is free. +// Submit all merging jobs. + if (!fGridOutputDir.Contains("/")) fGridOutputDir = Form("/%s/%s/%s", gGrid->GetHomeDirectory(), fGridWorkingDir.Data(), fGridOutputDir.Data()); + gGrid->Cd(fGridOutputDir); + TString mergeJDLName = fExecutable; + mergeJDLName.ReplaceAll(".sh", "_merge.jdl"); + Int_t ntosubmit = fInputFiles->GetEntries(); + for (Int_t i=0; iBaseName(fInputFiles->At(i)->GetName()); + runOutDir.ReplaceAll(".xml", ""); + if (fOutputToRunNo) { + // The output directory is the run number + printf("### Submitting merging job for run <%s>\n", runOutDir.Data()); + runOutDir = Form("%s/%s", fGridOutputDir.Data(), runOutDir.Data()); + } else { + // The output directory is the master number in 3 digits format + printf("### Submitting merging job for master <%03d>\n", i); + runOutDir = Form("%s/%03d",fGridOutputDir.Data(), i); + } + // Check now the number of merging stages. + TObjArray *list = fOutputFiles.Tokenize(","); + TIter next(list); + TObjString *str; + TString outputFile; + while((str=(TObjString*)next())) { + outputFile = str->GetString(); + Int_t index = outputFile.Index("@"); + if (index > 0) outputFile.Remove(index); + if (!fMergeExcludes.Contains(outputFile)) break; + } + delete list; + Bool_t done = CheckMergedFiles(outputFile, runOutDir, fMaxMergeFiles, mergeJDLName); + if (!done) return kFALSE; + } + if (!ntosubmit) return kTRUE; + Info("StartAnalysis", "\n#### STARTING AN ALIEN SHELL FOR YOU. EXIT WHEN YOUR MERGING JOBS HAVE FINISHED. #### \ + \n You may exit at any time and terminate the job later using the option but disabling SetMergeViaJDL\ + \n ##################################################################################"); + gSystem->Exec("aliensh"); + return kTRUE; +} + +//______________________________________________________________________________ +Bool_t AliAnalysisAlien::SubmitNext() +{ +// Submit next bunch of master jobs if the queue is free. The first master job is +// submitted right away, while the next will not be unless the previous was split. +// The plugin will not submit new master jobs if there are more that 500 jobs in +// waiting phase. static Bool_t iscalled = kFALSE; static Int_t firstmaster = 0; static Int_t lastmaster = 0; static Int_t npermaster = 0; - if (iscalled) return; + if (iscalled) return kTRUE; iscalled = kTRUE; Int_t nrunning=0, nwaiting=0, nerror=0, ndone=0; Int_t ntosubmit = 0; TGridResult *res; TString jobID = ""; - if (!fNsubmitted) ntosubmit = 1; - else { + Int_t nmasterjobs = fInputFiles->GetEntries(); + if (!fNsubmitted) { + ntosubmit = 1; + if (!IsUseSubmitPolicy()) { + if (nmasterjobs>5) + Info("SubmitNext","### Warning submit policy not used ! Submitting too many jobs at a time may be prohibitted. \ + \n### You can use SetUseSubmitPolicy() to enable if you have problems."); + ntosubmit = nmasterjobs; + } + } else { TString status = GetJobStatus(firstmaster, lastmaster, nrunning, nwaiting, nerror, ndone); printf("=== master %d: %s\n", lastmaster, status.Data()); // If last master not split, just return - if (status != "SPLIT") {iscalled = kFALSE; return;} + if (status != "SPLIT") {iscalled = kFALSE; return kTRUE;} // No more than 100 waiting jobs - if (nwaiting>100) {iscalled = kFALSE; return;} + if (nwaiting>500) {iscalled = kFALSE; return kTRUE;} npermaster = (nrunning+nwaiting+nerror+ndone)/fNsubmitted; - if (npermaster) ntosubmit = (100-nwaiting)/npermaster; + if (npermaster) ntosubmit = (500-nwaiting)/npermaster; + if (!ntosubmit) ntosubmit = 1; printf("=== WAITING(%d) RUNNING(%d) DONE(%d) OTHER(%d) NperMaster=%d => to submit %d jobs\n", nwaiting, nrunning, ndone, nerror, npermaster, ntosubmit); } - Int_t nmasterjobs = fInputFiles->GetEntries(); for (Int_t i=0; i=nmasterjobs) {iscalled = kFALSE; return;} + if (fNsubmitted>=nmasterjobs) {iscalled = kFALSE; return kTRUE;} TString query; TString runOutDir = gSystem->BaseName(fInputFiles->At(fNsubmitted)->GetName()); runOutDir.ReplaceAll(".xml", ""); @@ -1693,9 +2848,11 @@ void AliAnalysisAlien::SubmitNext() if (res) { TString cjobId1 = res->GetKey(0,"jobId"); if (!cjobId1.Length()) { - Error("StartAnalysis", "Your JDL %s could not be submitted", fJDLName.Data()); iscalled = kFALSE; - return; + gGrid->Stdout(); + gGrid->Stderr(); + Error("StartAnalysis", "Your JDL %s could not be submitted. The message was:", fJDLName.Data()); + return kFALSE; } else { Info("StartAnalysis", "\n_______________________________________________________________________ \ \n##### Your JDL %s submitted (%d to go). \nTHE JOB ID IS: %s \ @@ -1708,9 +2865,13 @@ void AliAnalysisAlien::SubmitNext() fNsubmitted++; } delete res; + } else { + Error("StartAnalysis", "No grid result after submission !!! Bailing out..."); + return kFALSE; } } iscalled = kFALSE; + return kTRUE; } //______________________________________________________________________________ @@ -1736,8 +2897,8 @@ void AliAnalysisAlien::WriteAnalysisFile() TDirectory *cdir = gDirectory; TFile *file = TFile::Open(analysisFile, "RECREATE"); if (file) { - // Skip task Terminate calls for the grid job - mgr->SetSkipTerminate(kTRUE); + // Skip task Terminate calls for the grid job (but not in test mode, where we want to check also the terminate mode + if (!TestBit(AliAnalysisGrid::kTest)) mgr->SetSkipTerminate(kTRUE); // Unless merging makes no sense if (IsSingleOutput()) mgr->SetSkipTerminate(kFALSE); mgr->Write(); @@ -1749,12 +2910,12 @@ void AliAnalysisAlien::WriteAnalysisFile() Info("WriteAnalysisFile", "\n##### Analysis manager: %s wrote to file <%s>\n", mgr->GetName(),analysisFile.Data()); } Bool_t copy = kTRUE; - if (TestBit(AliAnalysisGrid::kOffline) || TestBit(AliAnalysisGrid::kTest)) copy = kFALSE; + if (fProductionMode || TestBit(AliAnalysisGrid::kOffline) || TestBit(AliAnalysisGrid::kTest)) copy = kFALSE; if (copy) { CdWork(); TString workdir = gGrid->GetHomeDirectory(); workdir += fGridWorkingDir; - Info("CreateJDL", "\n##### Copying file <%s> containing your initialized analysis manager to your alien workspace", analysisFile.Data()); + Info("WriteAnalysisFile", "\n##### Copying file <%s> containing your initialized analysis manager to your alien workspace", analysisFile.Data()); if (FileExists(analysisFile)) gGrid->Rm(analysisFile); TFile::Cp(Form("file:%s",analysisFile.Data()), Form("alien://%s/%s", workdir.Data(),analysisFile.Data())); } @@ -1771,6 +2932,12 @@ void AliAnalysisAlien::WriteAnalysisMacro() Error("WriteAnalysisMacro", "could not open file %s for writing", fAnalysisMacro.Data()); return; } + Bool_t hasSTEERBase = kFALSE; + Bool_t hasESD = kFALSE; + Bool_t hasAOD = kFALSE; + Bool_t hasANALYSIS = kFALSE; + Bool_t hasANALYSISalice = kFALSE; + Bool_t hasCORRFW = kFALSE; TString func = fAnalysisMacro; TString type = "ESD"; TString comment = "// Analysis using "; @@ -1793,31 +2960,40 @@ void AliAnalysisAlien::WriteAnalysisMacro() out << "// Automatically generated analysis steering macro executed in grid subjobs" << endl << endl; out << " TStopwatch timer;" << endl; out << " timer.Start();" << endl << endl; - out << "// load base root libraries" << endl; - out << " gSystem->Load(\"libTree\");" << endl; - out << " gSystem->Load(\"libGeom\");" << endl; - out << " gSystem->Load(\"libVMC\");" << endl; - out << " gSystem->Load(\"libPhysics\");" << endl << endl; - out << " gSystem->Load(\"libMinuit\");" << endl << endl; + // Change temp directory to current one + out << "// Set temporary merging directory to current one" << endl; + out << " gSystem->Setenv(\"TMPDIR\", gSystem->pwd());" << endl << endl; + // Reset existing include path + out << "// Reset existing include path and add current directory first in the search" << endl; + out << " gSystem->SetIncludePath(\"-I.\");" << endl; + if (!fExecutableCommand.Contains("aliroot")) { + out << "// load base root libraries" << endl; + out << " gSystem->Load(\"libTree\");" << endl; + out << " gSystem->Load(\"libGeom\");" << endl; + out << " gSystem->Load(\"libVMC\");" << endl; + out << " gSystem->Load(\"libPhysics\");" << endl << endl; + out << " gSystem->Load(\"libMinuit\");" << endl << endl; + } if (fAdditionalRootLibs.Length()) { - // in principle libtree /lib geom libvmc etc. can go into this list, too - out << "// Add aditional libraries" << endl; - TObjArray *list = fAdditionalRootLibs.Tokenize(" "); - TIter next(list); - TObjString *str; - while((str=(TObjString*)next())) { - if (str->GetString().Contains(".so")) - out << " gSystem->Load(\"" << str->GetString().Data() << "\");" << endl; + // in principle libtree /lib geom libvmc etc. can go into this list, too + out << "// Add aditional libraries" << endl; + TObjArray *list = fAdditionalRootLibs.Tokenize(" "); + TIter next(list); + TObjString *str; + while((str=(TObjString*)next())) { + if (str->GetString().Contains(".so")) + out << " gSystem->Load(\"" << str->GetString().Data() << "\");" << endl; } - if (list) delete list; + if (list) delete list; } out << "// Load analysis framework libraries" << endl; - - + TString setupPar = "AliAnalysisAlien::SetupPar"; if (!fPackages) { - out << " gSystem->Load(\"libSTEERBase\");" << endl; - out << " gSystem->Load(\"libESD\");" << endl; - out << " gSystem->Load(\"libAOD\");" << endl; + if (!fExecutableCommand.Contains("aliroot")) { + out << " gSystem->Load(\"libSTEERBase\");" << endl; + out << " gSystem->Load(\"libESD\");" << endl; + out << " gSystem->Load(\"libAOD\");" << endl; + } out << " gSystem->Load(\"libANALYSIS\");" << endl; out << " gSystem->Load(\"libANALYSISalice\");" << endl; out << " gSystem->Load(\"libCORRFW\");" << endl << endl; @@ -1825,12 +3001,6 @@ void AliAnalysisAlien::WriteAnalysisMacro() TIter next(fPackages); TObject *obj; TString pkgname; - Bool_t hasSTEERBase = kFALSE; - Bool_t hasESD = kFALSE; - Bool_t hasAOD = kFALSE; - Bool_t hasANALYSIS = kFALSE; - Bool_t hasANALYSISalice = kFALSE; - Bool_t hasCORRFW = kFALSE; while ((obj=next())) { pkgname = obj->GetName(); if (pkgname == "STEERBase" || @@ -1845,19 +3015,20 @@ void AliAnalysisAlien::WriteAnalysisMacro() pkgname == "ANALYSISalice.par") hasANALYSISalice = kTRUE; if (pkgname == "CORRFW" || pkgname == "CORRFW.par") hasCORRFW = kTRUE; - } + } + if (hasANALYSISalice) setupPar = "SetupPar"; if (!hasSTEERBase) out << " gSystem->Load(\"libSTEERBase\");" << endl; - else out << " if (!SetupPar(\"STEERBase\")) return;" << endl; + else out << " if (!" << setupPar << "(\"STEERBase\")) return;" << endl; if (!hasESD) out << " gSystem->Load(\"libESD\");" << endl; - else out << " if (!SetupPar(\"ESD\")) return;" << endl; + else out << " if (!" << setupPar << "(\"ESD\")) return;" << endl; if (!hasAOD) out << " gSystem->Load(\"libAOD\");" << endl; - else out << " if (!SetupPar(\"AOD\")) return;" << endl; + else out << " if (!" << setupPar << "(\"AOD\")) return;" << endl; if (!hasANALYSIS) out << " gSystem->Load(\"libANALYSIS\");" << endl; - else out << " if (!SetupPar(\"ANALYSIS\")) return;" << endl; + else out << " if (!" << setupPar << "(\"ANALYSIS\")) return;" << endl; if (!hasANALYSISalice) out << " gSystem->Load(\"libANALYSISalice\");" << endl; - else out << " if (!SetupPar(\"ANALYSISalice\")) return;" << endl; + else out << " if (!" << setupPar << "(\"ANALYSISalice\")) return;" << endl; if (!hasCORRFW) out << " gSystem->Load(\"libCORRFW\");" << endl << endl; - else out << " if (!SetupPar(\"CORRFW\")) return;" << endl << endl; + else out << " if (!" << setupPar << "(\"CORRFW\")) return;" << endl << endl; out << "// Compile other par packages" << endl; next.Reset(); while ((obj=next())) { @@ -1874,12 +3045,24 @@ void AliAnalysisAlien::WriteAnalysisMacro() pkgname == "ANALYSISalice.par" || pkgname == "CORRFW" || pkgname == "CORRFW.par") continue; - out << " if (!SetupPar(\"" << obj->GetName() << "\")) return;" << endl; + out << " if (!" << setupPar << "(\"" << obj->GetName() << "\")) return;" << endl; } } out << "// include path" << endl; + // Get the include path from the interpreter and remove entries pointing to AliRoot + out << " TString intPath = gInterpreter->GetIncludePath();" << endl; + out << " TObjArray *listpaths = intPath.Tokenize(\" \");" << endl; + out << " TIter nextpath(listpaths);" << endl; + out << " TObjString *pname;" << endl; + out << " while ((pname=(TObjString*)nextpath())) {" << endl; + out << " TString current = pname->GetName();" << endl; + out << " if (current.Contains(\"AliRoot\") || current.Contains(\"ALICE_ROOT\")) continue;" << endl; + out << " gSystem->AddIncludePath(current);" << endl; + out << " }" << endl; + out << " if (listpaths) delete listpaths;" << endl; if (fIncludePath.Length()) out << " gSystem->AddIncludePath(\"" << fIncludePath.Data() << "\");" << endl; - out << " gSystem->AddIncludePath(\"-I$ALICE_ROOT/include\");" << endl << endl; + out << " gROOT->ProcessLine(\".include $ALICE_ROOT/include\");" << endl; + out << " printf(\"Include path: %s\\n\", gSystem->GetIncludePath());" << endl << endl; if (fAdditionalLibs.Length()) { out << "// Add aditional AliRoot libraries" << endl; TObjArray *list = fAdditionalLibs.Tokenize(" "); @@ -1888,6 +3071,8 @@ void AliAnalysisAlien::WriteAnalysisMacro() while((str=(TObjString*)next())) { if (str->GetString().Contains(".so")) out << " gSystem->Load(\"" << str->GetString().Data() << "\");" << endl; + if (str->GetString().Contains(".par")) + out << " if (!" << setupPar << "(\"" << str->GetString() << "\")) return;" << endl; } if (list) delete list; } @@ -1903,23 +3088,21 @@ void AliAnalysisAlien::WriteAnalysisMacro() if (list) delete list; } out << endl; +// out << " printf(\"Currently load libraries:\\n\");" << endl; +// out << " printf(\"%s\\n\", gSystem->GetLibraries());" << endl; + if (fFastReadOption) { + Warning("WriteAnalysisMacro", "!!! You requested FastRead option. Using xrootd flags to reduce timeouts in the grid jobs. This may skip some files that could be accessed !!! \ + \n+++ NOTE: To disable this option, use: plugin->SetFastReadOption(kFALSE)"); + out << "// fast xrootd reading enabled" << endl; + out << " printf(\"!!! You requested FastRead option. Using xrootd flags to reduce timeouts. Note that this may skip some files that could be accessed !!!\");" << endl; + out << " gEnv->SetValue(\"XNet.ConnectTimeout\",50);" << endl; + out << " gEnv->SetValue(\"XNet.RequestTimeout\",50);" << endl; + out << " gEnv->SetValue(\"XNet.MaxRedirectCount\",2);" << endl; + out << " gEnv->SetValue(\"XNet.ReconnectTimeout\",50);" << endl; + out << " gEnv->SetValue(\"XNet.FirstConnectMaxCnt\",1);" << endl << endl; + } out << "// connect to AliEn and make the chain" << endl; out << " if (!TGrid::Connect(\"alien://\")) return;" << endl; - if (IsUsingTags()) { - out << " TChain *chain = CreateChainFromTags(\"wn.xml\", anatype);" << endl << endl; - } else { - if(fFriendChainName!="AliAOD.VertexingHF.root") { - out << " TChain *chain = CreateChain(\"wn.xml\", anatype);" << endl << endl; - } else { - out << " // Check if the macro to create the chain was provided" << endl; - out << " if (gSystem->AccessPathName(\"MakeAODInputChain.C\")) {" << endl; - out << " ::Error(\"" << func.Data() << "\", \"File MakeAODInputChain.C not provided. Aborting.\");" << endl; - out << " return;" << endl; - out << " }" << endl; - out << " gROOT->LoadMacro(\"MakeAODInputChain.C\");" << endl; - out << " TChain *chain = MakeAODInputChain(\"wn.xml\",\"none\");" << endl << endl; - } - } out << "// read the analysis manager from file" << endl; TString analysisFile = fExecutable; analysisFile.ReplaceAll(".sh", ".root"); @@ -1933,10 +3116,25 @@ void AliAnalysisAlien::WriteAnalysisMacro() out << " mgr = (AliAnalysisManager*)file->Get(key->GetName());" << endl; out << " };" << endl; out << " if (!mgr) {" << endl; - out << " ::Error(\"" << func.Data() << "\", \"No analysis manager found in file" << analysisFile <<"\");" << endl; + out << " ::Error(\"" << func.Data() << "\", \"No analysis manager found in file " << analysisFile <<"\");" << endl; out << " return;" << endl; out << " }" << endl << endl; out << " mgr->PrintStatus();" << endl; + if (AliAnalysisManager::GetAnalysisManager()) { + if (AliAnalysisManager::GetAnalysisManager()->GetDebugLevel()>3) { + out << " gEnv->SetValue(\"XNet.Debug\", \"1\");" << endl; + } else { + if (TestBit(AliAnalysisGrid::kTest)) + out << " AliLog::SetGlobalLogLevel(AliLog::kWarning);" << endl; + else + out << " AliLog::SetGlobalLogLevel(AliLog::kError);" << endl; + } + } + if (IsUsingTags()) { + out << " TChain *chain = CreateChainFromTags(\"wn.xml\", anatype);" << endl << endl; + } else { + out << " TChain *chain = CreateChain(\"wn.xml\", anatype);" << endl << endl; + } out << " mgr->StartAnalysis(\"localfile\", chain);" << endl; out << " timer.Stop();" << endl; out << " timer.Print();" << endl; @@ -1979,7 +3177,7 @@ void AliAnalysisAlien::WriteAnalysisMacro() msg += " AliLHCTagCuts *lhcCuts,\n"; msg += " AliDetectorTagCuts *detCuts,\n"; msg += " AliEventTagCuts *evCuts)"; - Info("WriteAnalysisMacro", msg.Data()); + Info("WriteAnalysisMacro", "%s", msg.Data()); } } if (!IsUsingTags() || fFriendChainName!="") { @@ -1987,6 +3185,8 @@ void AliAnalysisAlien::WriteAnalysisMacro() out << "TChain* CreateChain(const char *xmlfile, const char *type=\"ESD\")" << endl; out << "{" << endl; out << "// Create a chain using url's from xml file" << endl; + out << " TString filename;" << endl; + out << " Int_t run = 0;" << endl; out << " TString treename = type;" << endl; out << " treename.ToLower();" << endl; out << " treename += \"Tree\";" << endl; @@ -1998,13 +3198,23 @@ void AliAnalysisAlien::WriteAnalysisMacro() out << " ::Error(\"CreateChain\", \"Cannot create an AliEn collection from %s\", xmlfile);" << endl; out << " return NULL;" << endl; out << " }" << endl; + out << " AliAnalysisManager *mgr = AliAnalysisManager::GetAnalysisManager();" << endl; out << " TChain *chain = new TChain(treename);" << endl; if(fFriendChainName!="") { out << " TChain *chainFriend = new TChain(treename);" << endl; } out << " coll->Reset();" << endl; out << " while (coll->Next()) {" << endl; - out << " chain->Add(coll->GetTURL(\"\"));" << endl; + out << " filename = coll->GetTURL("");" << endl; + out << " if (mgr) {" << endl; + out << " Int_t nrun = AliAnalysisManager::GetRunFromAlienPath(filename);" << endl; + out << " if (nrun && nrun != run) {" << endl; + out << " printf(\"### Run number detected from chain: %d\\n\", nrun);" << endl; + out << " mgr->SetRunFromPath(nrun);" << endl; + out << " run = nrun;" << endl; + out << " }" << endl; + out << " }" << endl; + out << " chain->Add(filename);" << endl; if(fFriendChainName!="") { out << " TString fileFriend=coll->GetTURL(\"\");" << endl; out << " fileFriend.ReplaceAll(\"AliAOD.root\",\""<Exec(Form(\"tar xvzf %s.par\", pkgdir.Data()));" << endl; + out << " gSystem->Exec(TString::Format(\"tar xvzf %s.par\", pkgdir.Data()));" << endl; out << " TString cdir = gSystem->WorkingDirectory();" << endl; out << " gSystem->ChangeDirectory(pkgdir);" << endl; out << " // Check for BUILD.sh and execute" << endl; @@ -2065,7 +3275,7 @@ void AliAnalysisAlien::WriteAnalysisMacro() Info("WriteAnalysisMacro", "\n##### Analysis macro to run on worker nodes <%s> written",fAnalysisMacro.Data()); } Bool_t copy = kTRUE; - if (TestBit(AliAnalysisGrid::kOffline) || TestBit(AliAnalysisGrid::kTest)) copy = kFALSE; + if (fProductionMode || TestBit(AliAnalysisGrid::kOffline) || TestBit(AliAnalysisGrid::kTest)) copy = kFALSE; if (copy) { CdWork(); TString workdir = gGrid->GetHomeDirectory(); @@ -2081,6 +3291,336 @@ void AliAnalysisAlien::WriteAnalysisMacro() } } +//______________________________________________________________________________ +void AliAnalysisAlien::WriteMergingMacro() +{ +// Write a macro to merge the outputs per master job. + if (!fMergeViaJDL) return; + if (!fOutputFiles.Length()) { + Error("WriteMergingMacro", "No output file names defined. Are you running the right AliAnalysisAlien configuration ?"); + return; + } + TString mergingMacro = fExecutable; + mergingMacro.ReplaceAll(".sh","_merge.C"); + if (!fGridOutputDir.Contains("/")) fGridOutputDir = Form("/%s/%s/%s", gGrid->GetHomeDirectory(), fGridWorkingDir.Data(), fGridOutputDir.Data()); + if (!TestBit(AliAnalysisGrid::kSubmit)) { + ofstream out; + out.open(mergingMacro.Data(), ios::out); + if (!out.good()) { + Error("WriteMergingMacro", "could not open file %s for writing", fAnalysisMacro.Data()); + return; + } + Bool_t hasSTEERBase = kFALSE; + Bool_t hasESD = kFALSE; + Bool_t hasAOD = kFALSE; + Bool_t hasANALYSIS = kFALSE; + Bool_t hasANALYSISalice = kFALSE; + Bool_t hasCORRFW = kFALSE; + TString func = mergingMacro; + TString comment; + func.ReplaceAll(".C", ""); + out << "void " << func.Data() << "(const char *dir, Int_t stage=0, Int_t laststage=0)" << endl; + out << "{" << endl; + out << "// Automatically generated merging macro executed in grid subjobs" << endl << endl; + out << " TStopwatch timer;" << endl; + out << " timer.Start();" << endl << endl; + // Reset existing include path + out << "// Reset existing include path and add current directory first in the search" << endl; + out << " gSystem->SetIncludePath(\"-I.\");" << endl; + if (!fExecutableCommand.Contains("aliroot")) { + out << "// load base root libraries" << endl; + out << " gSystem->Load(\"libTree\");" << endl; + out << " gSystem->Load(\"libGeom\");" << endl; + out << " gSystem->Load(\"libVMC\");" << endl; + out << " gSystem->Load(\"libPhysics\");" << endl << endl; + out << " gSystem->Load(\"libMinuit\");" << endl << endl; + } + if (fAdditionalRootLibs.Length()) { + // in principle libtree /lib geom libvmc etc. can go into this list, too + out << "// Add aditional libraries" << endl; + TObjArray *list = fAdditionalRootLibs.Tokenize(" "); + TIter next(list); + TObjString *str; + while((str=(TObjString*)next())) { + if (str->GetString().Contains(".so")) + out << " gSystem->Load(\"" << str->GetString().Data() << "\");" << endl; + } + if (list) delete list; + } + out << "// Load analysis framework libraries" << endl; + if (!fPackages) { + if (!fExecutableCommand.Contains("aliroot")) { + out << " gSystem->Load(\"libSTEERBase\");" << endl; + out << " gSystem->Load(\"libESD\");" << endl; + out << " gSystem->Load(\"libAOD\");" << endl; + } + out << " gSystem->Load(\"libANALYSIS\");" << endl; + out << " gSystem->Load(\"libANALYSISalice\");" << endl; + out << " gSystem->Load(\"libCORRFW\");" << endl << endl; + } else { + TIter next(fPackages); + TObject *obj; + TString pkgname; + TString setupPar = "AliAnalysisAlien::SetupPar"; + while ((obj=next())) { + pkgname = obj->GetName(); + if (pkgname == "STEERBase" || + pkgname == "STEERBase.par") hasSTEERBase = kTRUE; + if (pkgname == "ESD" || + pkgname == "ESD.par") hasESD = kTRUE; + if (pkgname == "AOD" || + pkgname == "AOD.par") hasAOD = kTRUE; + if (pkgname == "ANALYSIS" || + pkgname == "ANALYSIS.par") hasANALYSIS = kTRUE; + if (pkgname == "ANALYSISalice" || + pkgname == "ANALYSISalice.par") hasANALYSISalice = kTRUE; + if (pkgname == "CORRFW" || + pkgname == "CORRFW.par") hasCORRFW = kTRUE; + } + if (hasANALYSISalice) setupPar = "SetupPar"; + if (!hasSTEERBase) out << " gSystem->Load(\"libSTEERBase\");" << endl; + else out << " if (!" << setupPar << "(\"STEERBase\")) return;" << endl; + if (!hasESD) out << " gSystem->Load(\"libESD\");" << endl; + else out << " if (!" << setupPar << "(\"ESD\")) return;" << endl; + if (!hasAOD) out << " gSystem->Load(\"libAOD\");" << endl; + else out << " if (!" << setupPar << "(\"AOD\")) return;" << endl; + if (!hasANALYSIS) out << " gSystem->Load(\"libANALYSIS\");" << endl; + else out << " if (!" << setupPar << "(\"ANALYSIS\")) return;" << endl; + if (!hasANALYSISalice) out << " gSystem->Load(\"libANALYSISalice\");" << endl; + else out << " if (!" << setupPar << "(\"ANALYSISalice\")) return;" << endl; + if (!hasCORRFW) out << " gSystem->Load(\"libCORRFW\");" << endl << endl; + else out << " if (!" << setupPar << "(\"CORRFW\")) return;" << endl << endl; + out << "// Compile other par packages" << endl; + next.Reset(); + while ((obj=next())) { + pkgname = obj->GetName(); + if (pkgname == "STEERBase" || + pkgname == "STEERBase.par" || + pkgname == "ESD" || + pkgname == "ESD.par" || + pkgname == "AOD" || + pkgname == "AOD.par" || + pkgname == "ANALYSIS" || + pkgname == "ANALYSIS.par" || + pkgname == "ANALYSISalice" || + pkgname == "ANALYSISalice.par" || + pkgname == "CORRFW" || + pkgname == "CORRFW.par") continue; + out << " if (!" << setupPar << "(\"" << obj->GetName() << "\")) return;" << endl; + } + } + out << "// include path" << endl; + // Get the include path from the interpreter and remove entries pointing to AliRoot + out << " TString intPath = gInterpreter->GetIncludePath();" << endl; + out << " TObjArray *listpaths = intPath.Tokenize(\" \");" << endl; + out << " TIter nextpath(listpaths);" << endl; + out << " TObjString *pname;" << endl; + out << " while ((pname=(TObjString*)nextpath())) {" << endl; + out << " TString current = pname->GetName();" << endl; + out << " if (current.Contains(\"AliRoot\") || current.Contains(\"ALICE_ROOT\")) continue;" << endl; + out << " gSystem->AddIncludePath(current);" << endl; + out << " }" << endl; + out << " if (listpaths) delete listpaths;" << endl; + if (fIncludePath.Length()) out << " gSystem->AddIncludePath(\"" << fIncludePath.Data() << "\");" << endl; + out << " gROOT->ProcessLine(\".include $ALICE_ROOT/include\");" << endl; + out << " printf(\"Include path: %s\\n\", gSystem->GetIncludePath());" << endl << endl; + if (fAdditionalLibs.Length()) { + out << "// Add aditional AliRoot libraries" << endl; + TObjArray *list = fAdditionalLibs.Tokenize(" "); + TIter next(list); + TObjString *str; + while((str=(TObjString*)next())) { + if (str->GetString().Contains(".so")) + out << " gSystem->Load(\"" << str->GetString().Data() << "\");" << endl; + } + if (list) delete list; + } + out << endl; + out << "// Analysis source to be compiled at runtime (if any)" << endl; + if (fAnalysisSource.Length()) { + TObjArray *list = fAnalysisSource.Tokenize(" "); + TIter next(list); + TObjString *str; + while((str=(TObjString*)next())) { + out << " gROOT->ProcessLine(\".L " << str->GetString().Data() << "+g\");" << endl; + } + if (list) delete list; + } + out << endl; + + if (fFastReadOption) { + Warning("WriteMergingMacro", "!!! You requested FastRead option. Using xrootd flags to reduce timeouts in the grid merging jobs. Note that this may skip some files that could be accessed !!!"); + out << "// fast xrootd reading enabled" << endl; + out << " printf(\"!!! You requested FastRead option. Using xrootd flags to reduce timeouts. Note that this may skip some files that could be accessed !!!\");" << endl; + out << " gEnv->SetValue(\"XNet.ConnectTimeout\",50);" << endl; + out << " gEnv->SetValue(\"XNet.RequestTimeout\",50);" << endl; + out << " gEnv->SetValue(\"XNet.MaxRedirectCount\",2);" << endl; + out << " gEnv->SetValue(\"XNet.ReconnectTimeout\",50);" << endl; + out << " gEnv->SetValue(\"XNet.FirstConnectMaxCnt\",1);" << endl << endl; + } + // Change temp directory to current one + out << "// Set temporary merging directory to current one" << endl; + out << " gSystem->Setenv(\"TMPDIR\", gSystem->pwd());" << endl << endl; + out << "// Connect to AliEn" << endl; + out << " if (!TGrid::Connect(\"alien://\")) return;" << endl; + out << " TString outputDir = dir;" << endl; + out << " TString outputFiles = \"" << GetListOfFiles("out") << "\";" << endl; + out << " TString mergeExcludes = \"" << fMergeExcludes << "\";" << endl; + out << " TObjArray *list = outputFiles.Tokenize(\",\");" << endl; + out << " TIter *iter = new TIter(list);" << endl; + out << " TObjString *str;" << endl; + out << " TString outputFile;" << endl; + out << " Bool_t merged = kTRUE;" << endl; + out << " while((str=(TObjString*)iter->Next())) {" << endl; + out << " outputFile = str->GetString();" << endl; + out << " if (outputFile.Contains(\"*\")) continue;" << endl; + out << " Int_t index = outputFile.Index(\"@\");" << endl; + out << " if (index > 0) outputFile.Remove(index);" << endl; + out << " // Skip already merged outputs" << endl; + out << " if (!gSystem->AccessPathName(outputFile)) {" << endl; + out << " printf(\"Output file <%s> found. Not merging again.\",outputFile.Data());" << endl; + out << " continue;" << endl; + out << " }" << endl; + out << " if (mergeExcludes.Contains(outputFile.Data())) continue;" << endl; + out << " merged = AliAnalysisAlien::MergeOutput(outputFile, outputDir, " << fMaxMergeFiles << ", stage);" << endl; + out << " if (!merged) {" << endl; + out << " printf(\"ERROR: Cannot merge %s\\n\", outputFile.Data());" << endl; + out << " return;" << endl; + out << " }" << endl; + out << " }" << endl; + out << " // all outputs merged, validate" << endl; + out << " ofstream out;" << endl; + out << " out.open(\"outputs_valid\", ios::out);" << endl; + out << " out.close();" << endl; + out << " // read the analysis manager from file" << endl; + TString analysisFile = fExecutable; + analysisFile.ReplaceAll(".sh", ".root"); + out << " if (laststage<10000) return;" << endl; + out << " TFile *file = TFile::Open(\"" << analysisFile << "\");" << endl; + out << " if (!file) return;" << endl; + out << " TIter nextkey(file->GetListOfKeys());" << endl; + out << " AliAnalysisManager *mgr = 0;" << endl; + out << " TKey *key;" << endl; + out << " while ((key=(TKey*)nextkey())) {" << endl; + out << " if (!strcmp(key->GetClassName(), \"AliAnalysisManager\"))" << endl; + out << " mgr = (AliAnalysisManager*)file->Get(key->GetName());" << endl; + out << " };" << endl; + out << " if (!mgr) {" << endl; + out << " ::Error(\"" << func.Data() << "\", \"No analysis manager found in file" << analysisFile <<"\");" << endl; + out << " return;" << endl; + out << " }" << endl << endl; + out << " mgr->SetRunFromPath(mgr->GetRunFromAlienPath(dir));" << endl; + out << " mgr->SetSkipTerminate(kFALSE);" << endl; + out << " mgr->PrintStatus();" << endl; + if (AliAnalysisManager::GetAnalysisManager()) { + if (AliAnalysisManager::GetAnalysisManager()->GetDebugLevel()>3) { + out << " gEnv->SetValue(\"XNet.Debug\", \"1\");" << endl; + } else { + if (TestBit(AliAnalysisGrid::kTest)) + out << " AliLog::SetGlobalLogLevel(AliLog::kWarning);" << endl; + else + out << " AliLog::SetGlobalLogLevel(AliLog::kError);" << endl; + } + } + out << " TTree *tree = NULL;" << endl; + out << " mgr->StartAnalysis(\"gridterminate\", tree);" << endl; + out << "}" << endl << endl; + if (hasANALYSISalice) { + out <<"//________________________________________________________________________________" << endl; + out << "Bool_t SetupPar(const char *package) {" << endl; + out << "// Compile the package and set it up." << endl; + out << " TString pkgdir = package;" << endl; + out << " pkgdir.ReplaceAll(\".par\",\"\");" << endl; + out << " gSystem->Exec(TString::Format(\"tar xvzf %s.par\", pkgdir.Data()));" << endl; + out << " TString cdir = gSystem->WorkingDirectory();" << endl; + out << " gSystem->ChangeDirectory(pkgdir);" << endl; + out << " // Check for BUILD.sh and execute" << endl; + out << " if (!gSystem->AccessPathName(\"PROOF-INF/BUILD.sh\")) {" << endl; + out << " printf(\"*******************************\\n\");" << endl; + out << " printf(\"*** Building PAR archive ***\\n\");" << endl; + out << " printf(\"*******************************\\n\");" << endl; + out << " if (gSystem->Exec(\"PROOF-INF/BUILD.sh\")) {" << endl; + out << " ::Error(\"SetupPar\", \"Cannot build par archive %s\", pkgdir.Data());" << endl; + out << " gSystem->ChangeDirectory(cdir);" << endl; + out << " return kFALSE;" << endl; + out << " }" << endl; + out << " } else {" << endl; + out << " ::Error(\"SetupPar\",\"Cannot access PROOF-INF/BUILD.sh for package %s\", pkgdir.Data());" << endl; + out << " gSystem->ChangeDirectory(cdir);" << endl; + out << " return kFALSE;" << endl; + out << " }" << endl; + out << " // Check for SETUP.C and execute" << endl; + out << " if (!gSystem->AccessPathName(\"PROOF-INF/SETUP.C\")) {" << endl; + out << " printf(\"*******************************\\n\");" << endl; + out << " printf(\"*** Setup PAR archive ***\\n\");" << endl; + out << " printf(\"*******************************\\n\");" << endl; + out << " gROOT->Macro(\"PROOF-INF/SETUP.C\");" << endl; + out << " } else {" << endl; + out << " ::Error(\"SetupPar\",\"Cannot access PROOF-INF/SETUP.C for package %s\", pkgdir.Data());" << endl; + out << " gSystem->ChangeDirectory(cdir);" << endl; + out << " return kFALSE;" << endl; + out << " }" << endl; + out << " // Restore original workdir" << endl; + out << " gSystem->ChangeDirectory(cdir);" << endl; + out << " return kTRUE;" << endl; + out << "}" << endl; + } + } + Bool_t copy = kTRUE; + if (fProductionMode || TestBit(AliAnalysisGrid::kOffline) || TestBit(AliAnalysisGrid::kTest)) copy = kFALSE; + if (copy) { + CdWork(); + TString workdir = gGrid->GetHomeDirectory(); + workdir += fGridWorkingDir; + if (FileExists(mergingMacro)) gGrid->Rm(mergingMacro); + Info("WriteMergingMacro", "\n##### Copying merging macro: <%s> to your alien workspace", mergingMacro.Data()); + TFile::Cp(Form("file:%s",mergingMacro.Data()), Form("alien://%s/%s", workdir.Data(), mergingMacro.Data())); + } +} + +//______________________________________________________________________________ +Bool_t AliAnalysisAlien::SetupPar(const char *package) +{ +// Compile the par file archive pointed by . This must be present in the current directory. +// Note that for loading the compiled library. The current directory should have precedence in +// LD_LIBRARY_PATH + TString pkgdir = package; + pkgdir.ReplaceAll(".par",""); + gSystem->Exec(TString::Format("tar xzf %s.par", pkgdir.Data())); + TString cdir = gSystem->WorkingDirectory(); + gSystem->ChangeDirectory(pkgdir); + // Check for BUILD.sh and execute + if (!gSystem->AccessPathName("PROOF-INF/BUILD.sh")) { + printf("**************************************************\n"); + printf("*** Building PAR archive %s\n", package); + printf("**************************************************\n"); + if (gSystem->Exec("PROOF-INF/BUILD.sh")) { + ::Error("SetupPar", "Cannot build par archive %s", pkgdir.Data()); + gSystem->ChangeDirectory(cdir); + return kFALSE; + } + } else { + ::Error("SetupPar","Cannot access PROOF-INF/BUILD.sh for package %s", pkgdir.Data()); + gSystem->ChangeDirectory(cdir); + return kFALSE; + } + // Check for SETUP.C and execute + if (!gSystem->AccessPathName("PROOF-INF/SETUP.C")) { + printf("**************************************************\n"); + printf("*** Setup PAR archive %s\n", package); + printf("**************************************************\n"); + gROOT->Macro("PROOF-INF/SETUP.C"); + printf("*** Loaded library: %s\n", gSystem->GetLibraries(pkgdir,"",kFALSE)); + } else { + ::Error("SetupPar","Cannot access PROOF-INF/SETUP.C for package %s", pkgdir.Data()); + gSystem->ChangeDirectory(cdir); + return kFALSE; + } + // Restore original workdir + gSystem->ChangeDirectory(cdir); + return kTRUE; +} + //______________________________________________________________________________ void AliAnalysisAlien::WriteExecutable() { @@ -2093,6 +3633,8 @@ void AliAnalysisAlien::WriteExecutable() return; } out << "#!/bin/bash" << endl; + // Make sure we can properly compile par files + out << "export LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH" << endl; out << "echo \"=========================================\"" << endl; out << "echo \"############## PATH : ##############\"" << endl; out << "echo $PATH" << endl; @@ -2111,31 +3653,88 @@ void AliAnalysisAlien::WriteExecutable() out << "echo \"############## memory : ##############\"" << endl; out << "free -m" << endl; out << "echo \"=========================================\"" << endl << endl; - // Make sure we can properly compile par files - if (TObject::TestBit(AliAnalysisGrid::kUsePars)) out << "export LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH" << endl; out << fExecutableCommand << " "; out << fAnalysisMacro.Data() << " " << fExecutableArgs.Data() << endl << endl; out << "echo \"======== " << fAnalysisMacro.Data() << " finished with exit code: $? ========\"" << endl; out << "echo \"############## memory after: ##############\"" << endl; out << "free -m" << endl; - out << "echo \"############## Last 10 lines from dmesg : ##############\"" << endl; - out << "dmesg | tail -n 10" << endl; } Bool_t copy = kTRUE; - if (TestBit(AliAnalysisGrid::kOffline) || TestBit(AliAnalysisGrid::kTest)) copy = kFALSE; + if (fProductionMode || TestBit(AliAnalysisGrid::kOffline) || TestBit(AliAnalysisGrid::kTest)) copy = kFALSE; if (copy) { CdWork(); TString workdir = gGrid->GetHomeDirectory(); TString bindir = Form("%s/bin", workdir.Data()); - if (!DirectoryExists(bindir)) gGrid->Mkdir(bindir); + if (!DirectoryExists(bindir)) gGrid->Mkdir(bindir,"-p"); workdir += fGridWorkingDir; TString executable = Form("%s/bin/%s", gGrid->GetHomeDirectory(), fExecutable.Data()); if (FileExists(executable)) gGrid->Rm(executable); - Info("CreateJDL", "\n##### Copying executable file <%s> to your AliEn bin directory", fExecutable.Data()); + Info("WriteExecutable", "\n##### Copying executable file <%s> to your AliEn bin directory", fExecutable.Data()); TFile::Cp(Form("file:%s",fExecutable.Data()), Form("alien://%s", executable.Data())); } } +//______________________________________________________________________________ +void AliAnalysisAlien::WriteMergeExecutable() +{ +// Generate the alien executable script for the merging job. + if (!fMergeViaJDL) return; + TString mergeExec = fExecutable; + mergeExec.ReplaceAll(".sh", "_merge.sh"); + if (!TestBit(AliAnalysisGrid::kSubmit)) { + ofstream out; + out.open(mergeExec.Data(), ios::out); + if (out.bad()) { + Error("WriteMergingExecutable", "Bad file name for executable: %s", mergeExec.Data()); + return; + } + out << "#!/bin/bash" << endl; + // Make sure we can properly compile par files + out << "export LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH" << endl; + out << "echo \"=========================================\"" << endl; + out << "echo \"############## PATH : ##############\"" << endl; + out << "echo $PATH" << endl; + out << "echo \"############## LD_LIBRARY_PATH : ##############\"" << endl; + out << "echo $LD_LIBRARY_PATH" << endl; + out << "echo \"############## ROOTSYS : ##############\"" << endl; + out << "echo $ROOTSYS" << endl; + out << "echo \"############## which root : ##############\"" << endl; + out << "which root" << endl; + out << "echo \"############## ALICE_ROOT : ##############\"" << endl; + out << "echo $ALICE_ROOT" << endl; + out << "echo \"############## which aliroot : ##############\"" << endl; + out << "which aliroot" << endl; + out << "echo \"############## system limits : ##############\"" << endl; + out << "ulimit -a" << endl; + out << "echo \"############## memory : ##############\"" << endl; + out << "free -m" << endl; + out << "echo \"=========================================\"" << endl << endl; + TString mergeMacro = fExecutable; + mergeMacro.ReplaceAll(".sh", "_merge.C"); + if (IsOneStageMerging()) + out << "export ARG=\"" << mergeMacro << "(\\\"$1\\\")\"" << endl; + else + out << "export ARG=\"" << mergeMacro << "(\\\"$1\\\",$2,$3)\"" << endl; + out << fExecutableCommand << " " << "$ARG" << endl; + out << "echo \"======== " << mergeMacro.Data() << " finished with exit code: $? ========\"" << endl; + out << "echo \"############## memory after: ##############\"" << endl; + out << "free -m" << endl; + } + Bool_t copy = kTRUE; + if (fProductionMode || TestBit(AliAnalysisGrid::kOffline) || TestBit(AliAnalysisGrid::kTest)) copy = kFALSE; + if (copy) { + CdWork(); + TString workdir = gGrid->GetHomeDirectory(); + TString bindir = Form("%s/bin", workdir.Data()); + if (!DirectoryExists(bindir)) gGrid->Mkdir(bindir,"-p"); + workdir += fGridWorkingDir; + TString executable = Form("%s/bin/%s", gGrid->GetHomeDirectory(), mergeExec.Data()); + if (FileExists(executable)) gGrid->Rm(executable); + Info("WriteMergeExecutable", "\n##### Copying executable file <%s> to your AliEn bin directory", mergeExec.Data()); + TFile::Cp(Form("file:%s",mergeExec.Data()), Form("alien://%s", executable.Data())); + } +} + //______________________________________________________________________________ void AliAnalysisAlien::WriteProductionFile(const char *filename) const { @@ -2149,33 +3748,51 @@ void AliAnalysisAlien::WriteProductionFile(const char *filename) const Error("WriteProductionFile", "Bad file name: %s", filename); return; } - TString workdir = gGrid->GetHomeDirectory(); + TString workdir; + if (!fProductionMode && !fGridWorkingDir.BeginsWith("/alice")) + workdir = gGrid->GetHomeDirectory(); workdir += fGridWorkingDir; Int_t njobspermaster = 1000*fNrunsPerMaster/fSplitMaxInputFileNumber; TString locjdl = Form("%s/%s", workdir.Data(),fJDLName.Data()); out << locjdl << " " << njobspermaster << endl; Int_t nmasterjobs = fInputFiles->GetEntries(); for (Int_t i=0; iAt(i)->GetName()) << " " << Form("%03d", i) << endl; + TString runOutDir = gSystem->BaseName(fInputFiles->At(i)->GetName()); + runOutDir.ReplaceAll(".xml", ""); + if (fOutputToRunNo) + out << Form("%s", fInputFiles->At(i)->GetName()) << " " << runOutDir << endl; + else + out << Form("%s", fInputFiles->At(i)->GetName()) << " " << Form("%03d", i) << endl; } - Info("WriteProductionFile", "\n##### Copying production file <%s> to your work directory", filename); - TFile::Cp(Form("file:%s",filename), Form("alien://%s/%s", workdir.Data(),filename)); + if (gGrid) { + Info("WriteProductionFile", "\n##### Copying production file <%s> to your work directory", filename); + if (FileExists(filename)) gGrid->Rm(filename); + TFile::Cp(Form("file:%s",filename), Form("alien://%s/%s", workdir.Data(),filename)); + } } //______________________________________________________________________________ -void AliAnalysisAlien::WriteValidationScript() +void AliAnalysisAlien::WriteValidationScript(Bool_t merge) { // Generate the alien validation script. // Generate the validation script TObjString *os; - TString validationScript = fExecutable; - validationScript.ReplaceAll(".sh", "_validation.sh"); + if (fValidationScript.IsNull()) { + fValidationScript = fExecutable; + fValidationScript.ReplaceAll(".sh", "_validation.sh"); + } + TString validationScript = fValidationScript; + if (merge) validationScript.ReplaceAll(".sh", "_merge.sh"); if (!Connect()) { Error("WriteValidationScript", "Alien connection required"); return; } - TString out_stream = ""; - if (!TestBit(AliAnalysisGrid::kTest)) out_stream = " >> stdout"; + if (!fTerminateFiles.IsNull()) { + fTerminateFiles.Strip(); + fTerminateFiles.ReplaceAll(" ",","); + } + TString outStream = ""; + if (!TestBit(AliAnalysisGrid::kTest)) outStream = " >> stdout"; if (!TestBit(AliAnalysisGrid::kSubmit)) { ofstream out; out.open(validationScript, ios::out); @@ -2191,63 +3808,79 @@ void AliAnalysisAlien::WriteValidationScript() out << "fi" << endl << endl; out << "cd $validateout;" << endl; out << "validateworkdir=`pwd`;" << endl << endl; - out << "echo \"*******************************************************\"" << out_stream << endl; - out << "echo \"* Automatically generated validation script *\"" << out_stream << endl; + out << "echo \"*******************************************************\"" << outStream << endl; + out << "echo \"* Automatically generated validation script *\"" << outStream << endl; out << "" << endl; - out << "echo \"* Time: $validatetime \"" << out_stream << endl; - out << "echo \"* Dir: $validateout\"" << out_stream << endl; - out << "echo \"* Workdir: $validateworkdir\"" << out_stream << endl; - out << "echo \"* ----------------------------------------------------*\"" << out_stream << endl; - out << "ls -la ./" << out_stream << endl; - out << "echo \"* ----------------------------------------------------*\"" << out_stream << endl << endl; + out << "echo \"* Time: $validatetime \"" << outStream << endl; + out << "echo \"* Dir: $validateout\"" << outStream << endl; + out << "echo \"* Workdir: $validateworkdir\"" << outStream << endl; + out << "echo \"* ----------------------------------------------------*\"" << outStream << endl; + out << "ls -la ./" << outStream << endl; + out << "echo \"* ----------------------------------------------------*\"" << outStream << endl << endl; out << "##################################################" << endl; - - out << "" << endl; - out << "parArch=`grep -Ei \"Cannot Build the PAR Archive\" stderr`" << endl; - out << "segViol=`grep -Ei \"Segmentation violation\" stderr`" << endl; - out << "segFault=`grep -Ei \"Segmentation fault\" stderr`" << endl; out << "" << endl; out << "if [ ! -f stderr ] ; then" << endl; out << " error=1" << endl; - out << " echo \"* ########## Job not validated - no stderr ###\" " << out_stream << endl; - out << " echo \"Error = $error\" " << out_stream << endl; + out << " echo \"* ########## Job not validated - no stderr ###\" " << outStream << endl; + out << " echo \"Error = $error\" " << outStream << endl; out << "fi" << endl; + out << "parArch=`grep -Ei \"Cannot Build the PAR Archive\" stderr`" << endl; + out << "segViol=`grep -Ei \"Segmentation violation\" stderr`" << endl; + out << "segFault=`grep -Ei \"Segmentation fault\" stderr`" << endl; + out << "glibcErr=`grep -Ei \"*** glibc detected ***\" stderr`" << endl; + out << "" << endl; + out << "if [ \"$parArch\" != \"\" ] ; then" << endl; out << " error=1" << endl; - out << " echo \"* ########## Job not validated - PAR archive not built ###\" " << out_stream << endl; - out << " echo \"$parArch\" " << out_stream << endl; - out << " echo \"Error = $error\" " << out_stream << endl; + out << " echo \"* ########## Job not validated - PAR archive not built ###\" " << outStream << endl; + out << " echo \"$parArch\" " << outStream << endl; + out << " echo \"Error = $error\" " << outStream << endl; out << "fi" << endl; out << "if [ \"$segViol\" != \"\" ] ; then" << endl; out << " error=1" << endl; - out << " echo \"* ########## Job not validated - Segment. violation ###\" " << out_stream << endl; - out << " echo \"$segViol\" " << out_stream << endl; - out << " echo \"Error = $error\" " << out_stream << endl; + out << " echo \"* ########## Job not validated - Segment. violation ###\" " << outStream << endl; + out << " echo \"$segViol\" " << outStream << endl; + out << " echo \"Error = $error\" " << outStream << endl; out << "fi" << endl; out << "if [ \"$segFault\" != \"\" ] ; then" << endl; out << " error=1" << endl; - out << " echo \"* ########## Job not validated - Segment. fault ###\" " << out_stream << endl; - out << " echo \"$segFault\" " << out_stream << endl; - out << " echo \"Error = $error\" " << out_stream << endl; + out << " echo \"* ########## Job not validated - Segment. fault ###\" " << outStream << endl; + out << " echo \"$segFault\" " << outStream << endl; + out << " echo \"Error = $error\" " << outStream << endl; + out << "fi" << endl; + + out << "if [ \"$glibcErr\" != \"\" ] ; then" << endl; + out << " error=1" << endl; + out << " echo \"* ########## Job not validated - *** glibc detected *** ###\" " << outStream << endl; + out << " echo \"$glibcErr\" " << outStream << endl; + out << " echo \"Error = $error\" " << outStream << endl; out << "fi" << endl; // Part dedicated to the specific analyses running into the train - TObjArray *arr = fOutputFiles.Tokenize(" "); + TString outputFiles = fOutputFiles; + if (merge && !fTerminateFiles.IsNull()) { + outputFiles += ","; + outputFiles += fTerminateFiles; + } + TObjArray *arr = outputFiles.Tokenize(","); TIter next1(arr); - TString output_file; - while ((os=(TObjString*)next1())) { - output_file = os->GetString(); - Int_t index = output_file.Index("@"); - if (index > 0) output_file.Remove(index); - out << "if ! [ -f " << output_file.Data() << " ] ; then" << endl; + TString outputFile; + while (!merge && (os=(TObjString*)next1())) { + // No need to validate outputs produced by merging since the merging macro does this + outputFile = os->GetString(); + Int_t index = outputFile.Index("@"); + if (index > 0) outputFile.Remove(index); + if (fTerminateFiles.Contains(outputFile)) continue; + if (outputFile.Contains("*")) continue; + out << "if ! [ -f " << outputFile.Data() << " ] ; then" << endl; out << " error=1" << endl; - out << " echo \"Output file(s) not found. Job FAILED !\"" << out_stream << endl; - out << " echo \"Output file(s) not found. Job FAILED !\" >> stderr" << endl; + out << " echo \"Output file " << outputFile << " not found. Job FAILED !\"" << outStream << endl; + out << " echo \"Output file " << outputFile << " not found. Job FAILED !\" >> stderr" << endl; out << "fi" << endl; } delete arr; @@ -2258,21 +3891,26 @@ void AliAnalysisAlien::WriteValidationScript() out << "fi" << endl; out << "if [ $error = 0 ] ; then" << endl; - out << " echo \"* ---------------- Job Validated ------------------*\"" << out_stream << endl; + out << " echo \"* ---------------- Job Validated ------------------*\"" << outStream << endl; + if (!IsKeepLogs()) { + out << " echo \"* === Logs std* will be deleted === \"" << endl; + outStream = ""; + out << " rm -f std*" << endl; + } out << "fi" << endl; - out << "echo \"* ----------------------------------------------------*\"" << out_stream << endl; - out << "echo \"*******************************************************\"" << out_stream << endl; + out << "echo \"* ----------------------------------------------------*\"" << outStream << endl; + out << "echo \"*******************************************************\"" << outStream << endl; out << "cd -" << endl; out << "exit $error" << endl; } Bool_t copy = kTRUE; - if (TestBit(AliAnalysisGrid::kOffline) || TestBit(AliAnalysisGrid::kTest)) copy = kFALSE; + if (fProductionMode || TestBit(AliAnalysisGrid::kOffline) || TestBit(AliAnalysisGrid::kTest)) copy = kFALSE; if (copy) { CdWork(); TString workdir = gGrid->GetHomeDirectory(); workdir += fGridWorkingDir; - Info("CreateJDL", "\n##### Copying validation script <%s> to your AliEn working space", validationScript.Data()); + Info("WriteValidationScript", "\n##### Copying validation script <%s> to your AliEn working space", validationScript.Data()); if (FileExists(validationScript)) gGrid->Rm(validationScript); TFile::Cp(Form("file:%s",validationScript.Data()), Form("alien://%s/%s", workdir.Data(),validationScript.Data())); }