1 /**************************************************************************
2 * Copyright(c) 1998-2007, ALICE Experiment at CERN, All rights reserved. *
4 * Author: The ALICE Off-line Project. *
5 * Contributors are mentioned in the code where appropriate. *
7 * Permission to use, copy, modify and distribute this software and its *
8 * documentation strictly for non-commercial purposes is hereby granted *
9 * without fee, provided that the above copyright notice appears in all *
10 * copies and that both the copyright notice and this permission notice *
11 * appear in the supporting documentation. The authors make no claims *
12 * about the suitability of this software for any purpose. It is *
13 * provided "as is" without express or implied warranty. *
14 **************************************************************************/
16 // Author: Mihaela Gheata, 01/09/2008
18 //==============================================================================
19 // AliAnalysisAlien - AliEn utility class. Provides interface for creating
20 // a personalized JDL, finding and creating a dataset.
21 //==============================================================================
23 #include "Riostream.h"
27 #include "TObjString.h"
28 #include "TObjArray.h"
30 #include "TGridResult.h"
31 #include "TGridCollection.h"
33 #include "TFileMerger.h"
34 #include "AliAnalysisManager.h"
35 #include "AliAnalysisAlien.h"
37 ClassImp(AliAnalysisAlien)
39 //______________________________________________________________________________
40 AliAnalysisAlien::AliAnalysisAlien()
45 fSplitMaxInputFileNumber(0),
47 fMasterResubmitThreshold(0),
74 //______________________________________________________________________________
75 AliAnalysisAlien::AliAnalysisAlien(const char *name)
76 :AliAnalysisGrid(name),
80 fSplitMaxInputFileNumber(0),
82 fMasterResubmitThreshold(0),
109 //______________________________________________________________________________
110 AliAnalysisAlien::AliAnalysisAlien(const AliAnalysisAlien& other)
111 :AliAnalysisGrid(other),
113 fPrice(other.fPrice),
115 fSplitMaxInputFileNumber(other.fSplitMaxInputFileNumber),
116 fMaxInitFailed(other.fMaxInitFailed),
117 fMasterResubmitThreshold(other.fMasterResubmitThreshold),
118 fRunNumbers(other.fRunNumbers),
119 fExecutable(other.fExecutable),
120 fArguments(other.fArguments),
121 fAnalysisMacro(other.fAnalysisMacro),
122 fAnalysisSource(other.fAnalysisSource),
123 fAdditionalLibs(other.fAdditionalLibs),
124 fSplitMode(other.fSplitMode),
125 fAPIVersion(other.fAPIVersion),
126 fROOTVersion(other.fROOTVersion),
127 fAliROOTVersion(other.fAliROOTVersion),
129 fGridWorkingDir(other.fGridWorkingDir),
130 fGridDataDir(other.fGridDataDir),
131 fDataPattern(other.fDataPattern),
132 fGridOutputDir(other.fGridOutputDir),
133 fOutputArchive(other.fOutputArchive),
134 fOutputFiles(other.fOutputFiles),
135 fInputFormat(other.fInputFormat),
136 fDatasetName(other.fDatasetName),
137 fJDLName(other.fJDLName),
141 fGridJDL = (TGridJDL*)gROOT->ProcessLine("new TAlienJDL()");
142 if (other.fInputFiles) {
143 fInputFiles = new TObjArray();
144 TIter next(other.fInputFiles);
146 while ((obj=next())) fInputFiles->Add(new TObjString(obj->GetName()));
147 fInputFiles->SetOwner();
151 //______________________________________________________________________________
152 AliAnalysisAlien::~AliAnalysisAlien()
155 if (fGridJDL) delete fGridJDL;
156 if (fInputFiles) delete fInputFiles;
159 //______________________________________________________________________________
160 AliAnalysisAlien &AliAnalysisAlien::operator=(const AliAnalysisAlien& other)
163 if (this != &other) {
164 AliAnalysisGrid::operator=(other);
165 fGridJDL = (TGridJDL*)gROOT->ProcessLine("new TAlienJDL()");
166 fPrice = other.fPrice;
168 fSplitMaxInputFileNumber = other.fSplitMaxInputFileNumber;
169 fMaxInitFailed = other.fMaxInitFailed;
170 fMasterResubmitThreshold = other.fMasterResubmitThreshold;
171 fRunNumbers = other.fRunNumbers;
172 fExecutable = other.fExecutable;
173 fArguments = other.fArguments;
174 fAnalysisMacro = other.fAnalysisMacro;
175 fAnalysisSource = other.fAnalysisSource;
176 fAdditionalLibs = other.fAdditionalLibs;
177 fSplitMode = other.fSplitMode;
178 fAPIVersion = other.fAPIVersion;
179 fROOTVersion = other.fROOTVersion;
180 fAliROOTVersion = other.fAliROOTVersion;
182 fGridWorkingDir = other.fGridWorkingDir;
183 fGridDataDir = other.fGridDataDir;
184 fDataPattern = other.fDataPattern;
185 fGridOutputDir = other.fGridOutputDir;
186 fOutputArchive = other.fOutputArchive;
187 fOutputFiles = other.fOutputFiles;
188 fInputFormat = other.fInputFormat;
189 fDatasetName = other.fDatasetName;
190 fJDLName = other.fJDLName;
191 if (other.fInputFiles) {
192 fInputFiles = new TObjArray();
193 TIter next(other.fInputFiles);
195 while ((obj=next())) fInputFiles->Add(new TObjString(obj->GetName()));
196 fInputFiles->SetOwner();
202 //______________________________________________________________________________
203 void AliAnalysisAlien::AddRunNumber(Int_t run)
205 // Add a run number to the list of runs to be processed.
206 if (fRunNumbers.Length()) fRunNumbers += " ";
207 fRunNumbers += Form("%d", run);
210 //______________________________________________________________________________
211 void AliAnalysisAlien::AddDataFile(const char *lfn)
213 // Adds a data file to the input to be analysed. The file should be a valid LFN
214 // or point to an existing file in the alien workdir.
215 if (!fInputFiles) fInputFiles = new TObjArray();
216 fInputFiles->Add(new TObjString(lfn));
219 //______________________________________________________________________________
220 Bool_t AliAnalysisAlien::Connect()
222 // Try to connect to AliEn. User needs a valid token and /tmp/gclient_env_$UID sourced.
223 if (gGrid && gGrid->IsConnected()) return kTRUE;
224 if (!gSystem->Getenv("alien_API_USER")) {
225 Error("Connect", "Make sure you:\n 1. Have called: alien-token-init <username> today\n 2. Have sourced /tmp/gclient_env_%s",
226 gSystem->Getenv("UID"));
230 Info("Connect", "Trying to connect to AliEn ...");
231 TGrid::Connect("alien://");
233 if (!gGrid || !gGrid->IsConnected()) {
234 Error("Connect", "Did not managed to connect to AliEn. Make sure you have a valid token.");
237 fUser = gGrid->GetUser();
238 Info("Connect", "\n##### Connected to AliEn as user %s. Setting analysis user to <%s>", fUser.Data(), fUser.Data());
242 //______________________________________________________________________________
243 void AliAnalysisAlien::CdWork()
245 // Check validity of alien workspace. Create directory if possible.
247 Error("CdWork", "Alien connection required");
250 TString homedir = gGrid->GetHomeDirectory();
251 TString workdir = homedir + fGridWorkingDir;
252 if (!gGrid->Cd(workdir)) {
254 if (gGrid->Mkdir(workdir)) {
255 gGrid->Cd(fGridWorkingDir);
256 Info("CreateJDL", "\n##### Created alien working directory %s", fGridWorkingDir.Data());
258 Warning("CreateJDL", "Working directory %s cannot be created.\n Using %s instead.",
259 workdir.Data(), homedir.Data());
260 fGridWorkingDir = "";
265 //______________________________________________________________________________
266 Bool_t AliAnalysisAlien::CheckInputData()
268 // Check validity of input data. If necessary, create xml files.
269 if (!fInputFiles && !fRunNumbers.Length()) {
270 Error("CheckInputData", "You have to specify either a set of run numbers or some existing grid files. Use AddRunNumber()/AddDataFile().");
273 // Process declared files
274 Bool_t is_collection = kFALSE;
275 Bool_t is_xml = kFALSE;
276 Bool_t use_tags = kFALSE;
277 Bool_t checked = kFALSE;
280 TString workdir = gGrid->GetHomeDirectory();
281 workdir += fGridWorkingDir;
284 TIter next(fInputFiles);
285 while ((objstr=(TObjString*)next())) {
288 file += objstr->GetString();
289 // Store full lfn path
290 if (FileExists(file)) objstr->SetString(file);
292 file = objstr->GetName();
293 if (!FileExists(objstr->GetName())) {
294 Error("CheckInputData", "Data file %s not found or not in your working dir: %s",
295 objstr->GetName(), workdir.Data());
299 Bool_t iscoll, isxml, usetags;
300 CheckDataType(file, iscoll, isxml, usetags);
303 is_collection = iscoll;
306 TObject::SetBit(AliAnalysisGrid::kUseTags, use_tags);
308 if ((iscoll != is_collection) || (isxml != is_xml) || (usetags != use_tags)) {
309 Error("CheckInputData", "Some conflict was found in the types of inputs");
315 // Process requested run numbers
316 if (!fRunNumbers.Length()) return kTRUE;
317 // Check validity of alien data directory
318 if (!fGridDataDir.Length()) {
319 Error("CkeckInputData", "AliEn path to base data directory must be set.\n = Use: SetGridDataDir()");
322 if (!gGrid->Cd(fGridDataDir)) {
323 Error("CheckInputData", "Data directory %s not existing.", fGridDataDir.Data());
327 Error("CheckInputData", "You are using raw AliEn collections as input. Cannot process run numbers.");
331 if (checked && !is_xml) {
332 Error("CheckInputData", "Cannot mix processing of full runs with non-xml files");
335 // Check validity of run number(s)
341 use_tags = fDataPattern.Contains("tag");
342 TObject::SetBit(AliAnalysisGrid::kUseTags, use_tags);
344 if (use_tags != fDataPattern.Contains("tag")) {
345 Error("CheckInputData", "Cannot mix input files using/not using tags");
348 if (fRunNumbers.Length()) {
349 arr = fRunNumbers.Tokenize(" ");
351 while ((os=(TObjString*)next())) {
352 path = Form("%s/%s ", fGridDataDir.Data(), os->GetString().Data());
353 if (!gGrid->Cd(path)) {
354 Error("CheckInputData", "Run number %s not found in path: %s", os->GetString().Data(), path.Data());
357 path = Form("%s/%s.xml", workdir.Data(),os->GetString().Data());
358 TString msg = "\n##### file: ";
360 msg += " type: xml_collection;";
361 if (use_tags) msg += " using_tags: Yes";
362 else msg += " using_tags: No";
363 Info("CheckDataType", msg.Data());
371 //______________________________________________________________________________
372 Bool_t AliAnalysisAlien::CreateDataset(const char *pattern)
374 // Create dataset for the grid data directory + run number.
375 if (TestBit(AliAnalysisGrid::kOffline)) return kFALSE;
377 Error("CreateDataset", "Cannot create dataset with no grid connection");
383 TString workdir = gGrid->GetHomeDirectory();
384 workdir += fGridWorkingDir;
386 // Compose the 'find' command arguments
388 TString options = "-x collection ";
389 if (TestBit(AliAnalysisGrid::kTest)) options += "-l 10 ";
390 TString conditions = "";
394 if (!fRunNumbers.Length()) return kTRUE;
396 TObjArray *arr = fRunNumbers.Tokenize(" ");
399 while ((os=(TObjString*)next())) {
400 path = Form("%s/%s ", fGridDataDir.Data(), os->GetString().Data());
401 file = Form("%s.xml", os->GetString().Data());
402 if (FileExists(file)) {
403 Info("CreateDataset", "\n##### Removing previous dataset %s", file.Data());
410 conditions = Form(" > %s", file.Data());
411 command += conditions;
412 TGridResult *res = gGrid->Command(command);
414 if (!FileExists(file)) {
415 Error("CreateDataset", "Command %s did NOT succeed", command.Data());
419 if (TestBit(AliAnalysisGrid::kTest)) break;
422 // Copy the file back to client if local testing is requested.
423 if (TestBit(AliAnalysisGrid::kTest)) {
424 Info("CreateDataset", "\n##### Copying dataset <%s> to <wn.xml> in your current directory...", file.Data());
425 TFile::Cp(Form("alien://%s/%s", workdir.Data(), file.Data()), "file:wn.xml",file.Data());
430 //______________________________________________________________________________
431 Bool_t AliAnalysisAlien::CreateJDL()
433 // Generate a JDL file according to current settings. The name of the file is
434 // specified by fJDLName.
435 Bool_t error = kFALSE;
438 if (TestBit(AliAnalysisGrid::kOffline) || TestBit(AliAnalysisGrid::kTest)) copy = kFALSE;
439 Bool_t generate = kTRUE;
440 if (TestBit(AliAnalysisGrid::kTest) || TestBit(AliAnalysisGrid::kSubmit)) generate = kFALSE;
442 Error("CreateJDL", "Alien connection required");
445 // Check validity of alien workspace
447 TString workdir = gGrid->GetHomeDirectory();
448 workdir += fGridWorkingDir;
452 Error("CreateJDL()", "Define some input files for your analysis.");
455 // Compose list of input files
456 // Check if output files were defined
457 if (!fOutputFiles.Length()) {
458 Error("CreateJDL", "You must define at least one output file");
461 // Check if an output directory was defined and valid
462 if (!fGridOutputDir.Length()) {
463 Error("CreateJDL", "You must define AliEn output directory");
466 if (!gGrid->Cd(fGridOutputDir)) {
467 if (gGrid->Mkdir(fGridOutputDir)) {
468 Info("CreateJDL", "\n##### Created alien output directory %s", fGridOutputDir.Data());
470 Error("CreateJDL", "Could not create alien output directory %s", fGridOutputDir.Data());
476 // Exit if any error up to now
477 if (error) return kFALSE;
479 fGridJDL->SetValue("User", Form("\"%s\"", fUser.Data()));
480 fGridJDL->SetExecutable(fExecutable);
481 // fGridJDL->SetTTL((UInt_t)fTTL);
482 fGridJDL->SetValue("TTL", Form("\"%d\"", fTTL));
483 if (fMaxInitFailed > 0)
484 fGridJDL->SetValue("MaxInitFailed", Form("\"%d\"",fMaxInitFailed));
485 if (fSplitMaxInputFileNumber > 0)
486 fGridJDL->SetValue("SplitMaxInputFileNumber", Form("\"%d\"", fSplitMaxInputFileNumber));
487 if (fSplitMode.Length())
488 fGridJDL->SetValue("Split", Form("\"%s\"", fSplitMode.Data()));
489 // fGridJDL->SetSplitMode(fSplitMode, (UInt_t)fSplitMaxInputFileNumber);
490 if (fAliROOTVersion.Length())
491 fGridJDL->AddToPackages("AliRoot", fAliROOTVersion);
492 if (fROOTVersion.Length())
493 fGridJDL->AddToPackages("ROOT", fROOTVersion);
494 if (fAPIVersion.Length())
495 fGridJDL->AddToPackages("APISCONFIG", fAPIVersion);
496 fGridJDL->SetInputDataListFormat(fInputFormat);
497 fGridJDL->SetInputDataList("wn.xml");
499 TIter next(fInputFiles);
500 while ((os=(TObjString*)next()))
501 fGridJDL->AddToInputDataCollection(Form("LF:%s,nodownload", os->GetString().Data()));
503 fGridJDL->AddToInputSandbox(Form("LF:%s/%s", workdir.Data(), fAnalysisMacro.Data()));
504 fGridJDL->AddToInputSandbox(Form("LF:%s/analysis.root", workdir.Data()));
505 if (IsUsingTags() && !gSystem->AccessPathName("ConfigureCuts.C"))
506 fGridJDL->AddToInputSandbox(Form("LF:%s/ConfigureCuts.C", workdir.Data()));
507 if (fAdditionalLibs.Length()) {
508 arr = fAdditionalLibs.Tokenize(" ");
510 while ((os=(TObjString*)next())) {
511 if (os->GetString().Contains(".so")) continue;
512 fGridJDL->AddToInputSandbox(Form("LF:%s/%s", workdir.Data(), os->GetString().Data()));
516 if (fOutputArchive.Length()) {
517 arr = fOutputArchive.Tokenize(" ");
519 while ((os=(TObjString*)next()))
520 fGridJDL->AddToOutputArchive(os->GetString().Data());
523 fGridJDL->SetOutputDirectory(Form("%s/%s/#alien_counter_03i#", workdir.Data(), fGridOutputDir.Data()));
524 arr = fOutputFiles.Tokenize(" ");
526 while ((os=(TObjString*)next())) fGridJDL->AddToOutputSandbox(os->GetString());
528 // fGridJDL->SetPrice((UInt_t)fPrice);
529 fGridJDL->SetValue("Price", Form("\"%d\"", fPrice));
530 fGridJDL->SetValidationCommand(Form("%s/validate.sh", workdir.Data()));
531 if (fMasterResubmitThreshold) fGridJDL->SetValue("MasterResubmitThreshold", Form("\"%d%%\"", fMasterResubmitThreshold));
532 // Generate the JDL as a string
533 TString sjdl = fGridJDL->Generate();
535 index = sjdl.Index("Executable");
536 if (index >= 0) sjdl.Insert(index, "\n# This is the startup script\n");
537 index = sjdl.Index("Split ");
538 if (index >= 0) sjdl.Insert(index, "\n# We split per storage element\n");
539 index = sjdl.Index("SplitMaxInputFileNumber");
540 if (index >= 0) sjdl.Insert(index, "\n# We want each subjob to get maximum this number of input files\n");
541 index = sjdl.Index("InputDataCollection");
542 if (index >= 0) sjdl.Insert(index, "# Input xml collections\n");
543 index = sjdl.Index("InputFile");
544 if (index >= 0) sjdl.Insert(index, "\n# List of input files to be uploaded to wn's\n");
545 index = sjdl.Index("InputDataList ");
546 if (index >= 0) sjdl.Insert(index, "\n# Collection to be processed on wn\n");
547 index = sjdl.Index("InputDataListFormat");
548 if (index >= 0) sjdl.Insert(index, "\n# Format of input data\n");
549 index = sjdl.Index("Price");
550 if (index >= 0) sjdl.Insert(index, "\n# AliEn price for this job\n");
551 index = sjdl.Index("Requirements");
552 if (index >= 0) sjdl.Insert(index, "\n# Additional requirements for the computing element\n");
553 index = sjdl.Index("Packages");
554 if (index >= 0) sjdl.Insert(index, "\n# Packages to be used\n");
555 index = sjdl.Index("User");
556 if (index >= 0) sjdl.Insert(index, "\n# AliEn user\n");
557 index = sjdl.Index("TTL");
558 if (index >= 0) sjdl.Insert(index, "\n# Time to live for the job\n");
559 index = sjdl.Index("OutputFile");
560 if (index >= 0) sjdl.Insert(index, "\n# List of output files to be registered\n");
561 index = sjdl.Index("OutputDir");
562 if (index >= 0) sjdl.Insert(index, "\n# Output directory\n");
563 index = sjdl.Index("OutputArchive");
564 if (index >= 0) sjdl.Insert(index, "\n# Files to be archived\n");
565 index = sjdl.Index("MaxInitFailed");
566 if (index >= 0) sjdl.Insert(index, "\n# Maximum number of first failing jobs to abort the master job\n");
567 index = sjdl.Index("MasterResubmitThreshold");
568 if (index >= 0) sjdl.Insert(index, "\n# Resubmit failed jobs until DONE rate reaches this percentage\n");
569 sjdl.ReplaceAll("ValidationCommand", "Validationcommand");
570 index = sjdl.Index("Validationcommand");
571 if (index >= 0) sjdl.Insert(index, "\n# Validation script to be run for each subjob\n");
572 sjdl.ReplaceAll("\"LF:", "\n \"LF:");
573 sjdl.ReplaceAll("(member", "\n (member");
574 sjdl.ReplaceAll("\",\"VO_", "\",\n \"VO_");
575 sjdl.ReplaceAll("{", "{\n ");
576 sjdl.ReplaceAll("};", "\n};");
577 sjdl.ReplaceAll("{\n \n", "{\n");
578 sjdl.ReplaceAll("\n\n", "\n");
579 sjdl.ReplaceAll("OutputDirectory", "OutputDir");
580 sjdl += "JDLVariables = \n{\n \"Packages\",\n \"OutputDir\"\n};\n";
581 sjdl.Prepend("JobTag = \"Automatically generated analysis JDL\";\n");
582 index = sjdl.Index("JDLVariables");
583 if (index >= 0) sjdl.Insert(index, "\n# JDL variables\n");
586 out.open(fJDLName.Data(), ios::out);
588 Error("CreateJDL", "Bad file name: %s", fJDLName.Data());
593 // Copy jdl to grid workspace
595 Info("CreateJDL", "\n##### You may want to review jdl:%s and analysis macro:%s before running in <submit> mode", fJDLName.Data(), fAnalysisMacro.Data());
597 Info("CreateJDL", "\n##### Copying JDL file <%s> to your AliEn working space", fJDLName.Data());
598 if (FileExists(fJDLName)) gGrid->Rm(fJDLName);
599 TFile::Cp(Form("file:%s",fJDLName.Data()), Form("alien://%s/%s", workdir.Data(), fJDLName.Data()));
600 if (fAdditionalLibs.Length()) {
601 arr = fAdditionalLibs.Tokenize(" ");
604 while ((os=(TObjString*)next())) {
605 Info("CreateJDL", "\n##### Copying dependency: <%s> to your alien workspace", os->GetString().Data());
606 if (os->GetString().Contains(".so")) continue;
607 if (FileExists(os->GetString())) gGrid->Rm(os->GetString());
608 TFile::Cp(Form("file:%s",os->GetString().Data()), Form("alien://%s/%s", workdir.Data(), os->GetString().Data()));
616 //______________________________________________________________________________
617 Bool_t AliAnalysisAlien::FileExists(const char *lfn) const
619 // Returns true if file exists.
621 Error("FileExists", "No connection to grid");
624 TGridResult *res = gGrid->Ls(lfn);
625 if (!res) return kFALSE;
626 TMap *map = dynamic_cast<TMap*>(res->At(0));
631 TObjString *objs = dynamic_cast<TObjString*>(map->GetValue("name"));
632 if (!objs || !objs->GetString().Length()) {
640 //______________________________________________________________________________
641 void AliAnalysisAlien::CheckDataType(const char *lfn, Bool_t &is_collection, Bool_t &is_xml, Bool_t &use_tags)
643 // Check input data type.
644 is_collection = kFALSE;
648 Error("CheckDataType", "No connection to grid");
651 is_collection = IsCollection(lfn);
652 TString msg = "\n##### file: ";
655 msg += " type: raw_collection;";
656 // special treatment for collections
658 // check for tag files in the collection
659 TGridResult *res = gGrid->Command(Form("listFilesFromCollection -z -v %s",lfn), kFALSE);
661 msg += " using_tags: No (unknown)";
662 Info("CheckDataType", msg.Data());
665 const char* typeStr = res->GetKey(0, "origLFN");
666 if (!typeStr || !strlen(typeStr)) {
667 msg += " using_tags: No (unknown)";
668 Info("CheckDataType", msg.Data());
671 TString file = typeStr;
672 use_tags = file.Contains(".tag");
673 if (use_tags) msg += " using_tags: Yes";
674 else msg += " using_tags: No";
675 Info("CheckDataType", msg.Data());
680 is_xml = slfn.Contains(".xml");
682 // Open xml collection and check if there are tag files inside
683 msg += " type: xml_collection;";
684 TGridCollection *coll = (TGridCollection*)gROOT->ProcessLine(Form("TAlienCollection::Open(\"alien://%s\",1);",lfn));
686 msg += " using_tags: No (unknown)";
687 Info("CheckDataType", msg.Data());
690 TMap *map = coll->Next();
692 msg += " using_tags: No (unknown)";
693 Info("CheckDataType", msg.Data());
696 map = (TMap*)map->GetValue("");
698 if (map && map->GetValue("name")) file = map->GetValue("name")->GetName();
699 use_tags = file.Contains(".tag");
701 if (use_tags) msg += " using_tags: Yes";
702 else msg += " using_tags: No";
703 Info("CheckDataType", msg.Data());
706 use_tags = slfn.Contains(".tag");
707 if (slfn.Contains(".root")) msg += " type: root file;";
708 else msg += " type: unhnown file;";
709 if (use_tags) msg += " using_tags: Yes";
710 else msg += " using_tags: No";
711 Info("CheckDataType", msg.Data());
714 //______________________________________________________________________________
715 Bool_t AliAnalysisAlien::IsCollection(const char *lfn) const
717 // Returns true if file is a collection. Functionality duplicated from
718 // TAlien::Type() because we don't want to directly depend on TAlien.
720 Error("IsCollection", "No connection to grid");
723 TGridResult *res = gGrid->Command(Form("type -z %s",lfn),kFALSE);
724 if (!res) return kFALSE;
725 const char* typeStr = res->GetKey(0, "type");
726 if (!typeStr || !strlen(typeStr)) return kFALSE;
727 if (!strcmp(typeStr, "collection")) return kTRUE;
732 //______________________________________________________________________________
733 void AliAnalysisAlien::SetDefaults()
735 // Set default values for everything. What cannot be filled will be left empty.
736 if (fGridJDL) delete fGridJDL;
737 fGridJDL = (TGridJDL*)gROOT->ProcessLine("new TAlienJDL()");
740 fSplitMaxInputFileNumber = 100;
742 fMasterResubmitThreshold = 0;
744 fExecutable = "analysis.sh";
746 fAnalysisMacro = "myAnalysis.C";
747 fAnalysisSource = "";
748 fAdditionalLibs = "";
752 fAliROOTVersion = "";
753 fUser = ""; // Your alien user name
754 fGridWorkingDir = "";
755 fGridDataDir = ""; // Can be like: /alice/sim/PDC_08a/LHC08c9/
756 fDataPattern = "*AliESDs.root"; // Can be like: *AliESDs.root, */pass1/*AliESDs.root, ...
757 fGridOutputDir = "output";
758 fOutputArchive = "log_archive.zip:stdout,stderr root_archive.zip:*.root";
759 fOutputFiles = ""; // Like "AliAODs.root histos.root"
760 fInputFormat = "xml-single";
761 fJDLName = "analysis.jdl";
764 //______________________________________________________________________________
765 Bool_t AliAnalysisAlien::MergeOutputs()
767 // Merge analysis outputs existing in the AliEn space.
768 if (TestBit(AliAnalysisGrid::kTest)) return kTRUE;
769 if (TestBit(AliAnalysisGrid::kOffline)) return kFALSE;
771 Error("MergeOutputs", "Cannot merge outputs without grid connection. Terminate will NOT be executed");
774 // Get the output path
775 TString output = Form("/%s/%s/%s", gGrid->GetHomeDirectory(), fGridWorkingDir.Data(), fGridOutputDir.Data());
776 if (!gGrid->Cd(output)) output = Form("/%s/%s", gGrid->GetHomeDirectory(), fGridOutputDir.Data());
777 if (!gGrid->Cd(output)) {
778 Error("MergeOutputs", "Grid output directory %s not found. Terminate() will NOT be executed", fGridOutputDir.Data());
781 if (!fOutputFiles.Length()) {
782 Error("MergeOutputs", "No output file names defined. Are you running the right AliAnalysisAlien configuration ?");
785 TObjArray *list = fOutputFiles.Tokenize(" ");
790 Bool_t merged = kTRUE;
791 while((str=(TObjString*)next())) {
792 output_file = str->GetString();
793 Int_t index = output_file.Index("@");
794 if (index > 0) output_file.Remove(index);
795 command = Form("find %s/ *%s", output.Data(), output_file.Data());
796 printf("command: %s\n", command.Data());
797 TGridResult *res = gGrid->Command(command);
802 while ((map=(TMap*)nextmap())) {
803 TObjString *objs = dynamic_cast<TObjString*>(map->GetValue("turl"));
804 if (!objs || !objs->GetString().Length()) {
809 fm = new TFileMerger(kFALSE);
810 fm->SetFastMethod(kTRUE);
811 fm->OutputFile(output_file);
813 fm->AddFile(objs->GetString());
815 if (!fm || !fm->GetMergeList() || !fm->GetMergeList()->GetSize()) {
816 Warning("MergeOutputs", "No <%s> files found.", output_file.Data());
822 Error("MergeOutputs", "Could not merge all <%s> files", output_file.Data());
825 Info("MergeOutputs", "\n##### Merged %d output files <%s>", fm->GetMergeList()->GetSize(), output_file.Data());
831 Error("MergeOutputs", "Terminate() will NOT be executed");
836 //______________________________________________________________________________
837 void AliAnalysisAlien::StartAnalysis(Long64_t /*nentries*/, Long64_t /*firstEntry*/)
839 // Start remote grid analysis.
841 if (TestBit(AliAnalysisGrid::kOffline)) {
842 Info("StartAnalysis","\n##### OFFLINE MODE ##### Files to be used in GRID are produced but not copied \
843 \n there nor any job run. You can revise the JDL and analysis \
844 \n macro then run the same in \"submit\" mode.");
845 } else if (TestBit(AliAnalysisGrid::kTest)) {
846 Info("StartAnalysis","\n##### LOCAL MODE ##### Your analysis will be run locally on a subset of the requested \
848 } else if (TestBit(AliAnalysisGrid::kSubmit)) {
849 Info("StartAnalysis","\n##### SUBMIT MODE ##### Files required by your analysis are copied to your grid working \
850 \n space and job submitted.");
851 } else if (TestBit(AliAnalysisGrid::kMerge)) {
852 Info("StartAnalysis","\n##### MERGE MODE ##### The registered outputs of the analysis will be merged");
855 Info("StartAnalysis","\n##### FULL ANALYSIS MODE ##### Producing needed files and submitting your analysis job...");
859 Error("StartAnalysis", "Cannot start grid analysis without grid connection");
862 if (!CheckInputData()) {
863 Error("StartAnalysis", "There was an error in preprocessing your requested input data");
866 CreateDataset(fDataPattern);
868 WriteAnalysisMacro();
870 WriteValidationScript();
871 if (!CreateJDL()) return;
872 if (TestBit(AliAnalysisGrid::kOffline)) return;
873 if (TestBit(AliAnalysisGrid::kTest)) {
874 // Locally testing the analysis
875 Info("StartAnalysis", "\n_______________________________________________________________________ \
876 \n Running analysis script in a daughter shell as on a worker node \
877 \n_______________________________________________________________________");
878 TObjArray *list = fOutputFiles.Tokenize(" ");
882 while((str=(TObjString*)next())) {
883 output_file = str->GetString();
884 Int_t index = output_file.Index("@");
885 if (index > 0) output_file.Remove(index);
886 gSystem->Exec(Form("rm %s", output_file.Data()));
889 gSystem->Exec(Form("bash %s 2>stderr", fExecutable.Data()));
890 gSystem->Exec("bash validate.sh");
891 // gSystem->Exec("cat stdout");
896 TGridResult *res = gGrid->Command(Form("submit %s", fJDLName.Data()));
899 const char *cjobId = res->GetKey(0,"jobId");
901 Error("StartAnalysis", "Your JDL %s could not be submitted", fJDLName.Data());
904 Info("StartAnalysis", "\n_______________________________________________________________________ \
905 \n##### Your JDL %s was successfully submitted. \nTHE JOB ID IS: %s \
906 \n_______________________________________________________________________",
907 fJDLName.Data(), cjobId);
912 Info("StartAnalysis", "\n#### STARTING AN ALIEN SHELL FOR YOU. EXIT WHEN YOUR JOB %s HAS FINISHED. #### \
913 \n You may exit at any time and terminate the job later using the option <terminate> \
914 \n ##################################################################################", jobID.Data());
918 //______________________________________________________________________________
919 void AliAnalysisAlien::WriteAnalysisFile()
921 // Write current analysis manager into the file analysis.root
922 if (!TestBit(AliAnalysisGrid::kSubmit)) {
923 AliAnalysisManager *mgr = AliAnalysisManager::GetAnalysisManager();
924 if (!mgr || !mgr->IsInitialized()) {
925 Error("WriteAnalysisFile", "You need an initialized analysis manager for this");
928 // Check analysis type
930 if (mgr->GetMCtruthEventHandler()) TObject::SetBit(AliAnalysisGrid::kUseMC);
931 handler = (TObject*)mgr->GetInputEventHandler();
933 if (handler->InheritsFrom("AliESDInputHandler")) TObject::SetBit(AliAnalysisGrid::kUseESD);
934 if (handler->InheritsFrom("AliAODInputHandler")) TObject::SetBit(AliAnalysisGrid::kUseAOD);
936 TDirectory *cdir = gDirectory;
937 TFile *file = TFile::Open("analysis.root", "RECREATE");
942 if (cdir) cdir->cd();
943 Info("WriteAnalysisFile", "\n##### Analysis manager: %s wrote to file <analysis.root>\n", mgr->GetName());
946 if (TestBit(AliAnalysisGrid::kOffline) || TestBit(AliAnalysisGrid::kTest)) copy = kFALSE;
949 TString workdir = gGrid->GetHomeDirectory();
950 workdir += fGridWorkingDir;
951 Info("CreateJDL", "\n##### Copying file <analysis.root> containing your initialized analysis manager to your alien workspace");
952 if (FileExists("analysis.root")) gGrid->Rm("analysis.root");
953 TFile::Cp("file:analysis.root", Form("alien://%s/analysis.root", workdir.Data()));
957 //______________________________________________________________________________
958 void AliAnalysisAlien::WriteAnalysisMacro()
960 // Write the analysis macro that will steer the analysis in grid mode.
961 if (!TestBit(AliAnalysisGrid::kSubmit)) {
963 out.open(fAnalysisMacro.Data(), ios::out);
965 Error("WriteAnalysisMacro", "could not open file %s for writing", fAnalysisMacro.Data());
968 TString func = fAnalysisMacro;
969 TString type = "ESD";
970 TString comment = "// Analysis using ";
971 if (TObject::TestBit(AliAnalysisGrid::kUseESD)) comment += "ESD";
972 if (TObject::TestBit(AliAnalysisGrid::kUseAOD)) {
976 if (TObject::TestBit(AliAnalysisGrid::kUseMC)) comment += "/MC";
977 else comment += " data";
978 out << "const char *anatype = \"" << type.Data() << "\";" << endl << endl;
979 func.ReplaceAll(".C", "");
980 out << "void " << func.Data() << "()" << endl;
982 out << comment.Data() << endl;
983 out << "// Automatically generated analysis steering macro executed in grid subjobs" << endl << endl;
984 out << "// load base root libraries" << endl;
985 out << " gSystem->Load(\"libTree\");" << endl;
986 out << " gSystem->Load(\"libGeom\");" << endl;
987 out << " gSystem->Load(\"libVMC\");" << endl;
988 out << " gSystem->Load(\"libPhysics\");" << endl << endl;
989 out << "// load analysis framework libraries" << endl;
990 out << " gSystem->Load(\"libSTEERBase\");" << endl;
991 out << " gSystem->Load(\"libESD\");" << endl;
992 out << " gSystem->Load(\"libAOD\");" << endl;
993 out << " gSystem->Load(\"libANALYSIS\");" << endl;
994 out << " gSystem->Load(\"libANALYSISalice\");" << endl << endl;
995 out << "// add aditional AliRoot libraries below" << endl;
996 if (fAdditionalLibs.Length()) {
997 TObjArray *list = fAdditionalLibs.Tokenize(" ");
1000 while((str=(TObjString*)next())) {
1001 if (str->GetString().Contains(".so"))
1002 out << " gSystem->Load(\"" << str->GetString().Data() << "\");" << endl;
1004 if (list) delete list;
1007 out << "// include path (remove if using par files)" << endl;
1008 out << " gROOT->ProcessLine(\".include $ALICE_ROOT/include\");" << endl << endl;
1009 out << "// analysis source to be compiled at runtime (if any)" << endl;
1010 if (fAnalysisSource.Length()) {
1011 TObjArray *list = fAnalysisSource.Tokenize(" ");
1014 while((str=(TObjString*)next())) {
1015 out << " gROOT->ProcessLine(\".L " << str->GetString().Data() << "+g\");" << endl;
1017 if (list) delete list;
1020 out << "// connect to AliEn and make the chain" << endl;
1021 out << " if (!TGrid::Connect(\"alien://\")) return;" << endl;
1022 if (IsUsingTags()) {
1023 out << " TChain *chain = CreateChainFromTags(\"wn.xml\", anatype);" << endl << endl;
1025 out << " TChain *chain = CreateChain(\"wn.xml\", anatype);" << endl << endl;
1027 out << "// read the analysis manager from file" << endl;
1028 out << " TFile *file = TFile::Open(\"analysis.root\");" << endl;
1029 out << " if (!file) return;" << endl;
1030 out << " TIter nextkey(file->GetListOfKeys());" << endl;
1031 out << " AliAnalysisManager *mgr = 0;" << endl;
1032 out << " TKey *key;" << endl;
1033 out << " while ((key=(TKey*)nextkey())) {" << endl;
1034 out << " if (!strcmp(key->GetClassName(), \"AliAnalysisManager\"))" << endl;
1035 out << " mgr = (AliAnalysisManager*)file->Get(key->GetName());" << endl;
1036 out << " };" << endl;
1037 out << " if (!mgr) {" << endl;
1038 out << " ::Error(\"" << func.Data() << "\", \"No analysis manager found in file analysis.root\");" << endl;
1039 out << " return;" << endl;
1040 out << " }" << endl << endl;
1041 out << " mgr->PrintStatus();" << endl;
1042 out << " mgr->StartAnalysis(\"localfile\", chain);" << endl;
1043 out << "}" << endl << endl;
1044 if (IsUsingTags()) {
1045 out << "TChain* CreateChainFromTags(const char *xmlfile, const char *type=\"ESD\")" << endl;
1047 out << "// Create a chain using tags from the xml file." << endl;
1048 out << " TAlienCollection* coll = TAlienCollection::Open(xmlfile);" << endl;
1049 out << " if (!coll) {" << endl;
1050 out << " ::Error(\"CreateChainFromTags\", \"Cannot create an AliEn collection from %s\", xmlfile);" << endl;
1051 out << " return NULL;" << endl;
1052 out << " }" << endl;
1053 out << " TGridResult* tagResult = coll->GetGridResult(\"\",kFALSE,kFALSE);" << endl;
1054 out << " AliTagAnalysis *tagAna = new AliTagAnalysis(type);" << endl;
1055 out << " tagAna->ChainGridTags(tagResult);" << endl << endl;
1056 out << " AliRunTagCuts *runCuts = new AliRunTagCuts();" << endl;
1057 out << " AliLHCTagCuts *lhcCuts = new AliLHCTagCuts();" << endl;
1058 out << " AliDetectorTagCuts *detCuts = new AliDetectorTagCuts();" << endl;
1059 out << " AliEventTagCuts *evCuts = new AliEventTagCuts();" << endl;
1060 out << " // Check if the cuts configuration file was provided" << endl;
1061 out << " if (!gSystem->AccessPathName(\"ConfigureCuts.C\")) {" << endl;
1062 out << " gROOT->LoadMacro(\"ConfigureCuts.C\");" << endl;
1063 out << " ConfigureCuts(runCuts, lhcCuts, detCuts, evCuts);" << endl;
1064 out << " }" << endl;
1065 out << " TChain *chain = tagAna->QueryTags(runCuts, lhcCuts, detCuts, evCuts);" << endl;
1066 out << " if (!chain || !chain->GetNtrees()) return NULL;" << endl;
1067 out << " chain->ls();" << endl;
1068 out << " return chain;" << endl;
1070 if (gSystem->AccessPathName("ConfigureCuts.C")) {
1071 TString msg = "\n##### You may want to provide a macro ConfigureCuts.C with a method:\n";
1072 msg += " void ConfigureCuts(AliRunTagCuts *runCuts,\n";
1073 msg += " AliLHCTagCuts *lhcCuts,\n";
1074 msg += " AliDetectorTagCuts *detCuts,\n";
1075 msg += " AliEventTagCuts *evCuts)";
1076 Info("WriteAnalysisMacro", msg.Data());
1079 out << "TChain* CreateChain(const char *xmlfile, const char *type=\"ESD\")" << endl;
1081 out << "// Create a chain using url's from xml file" << endl;
1082 out << " TString treename = type;" << endl;
1083 out << " treename.ToLower();" << endl;
1084 out << " treename += \"Tree\";" << endl;
1085 out << " printf(\"***************************************\");" << endl;
1086 out << " printf(\" Getting chain of trees %s\\n\", treename);" << endl;
1087 out << " printf(\"***************************************\");" << endl;
1088 out << " TAlienCollection *coll = TAlienCollection::Open(xmlfile);" << endl;
1089 out << " if (!coll) {" << endl;
1090 out << " ::Error(\"CreateChain\", \"Cannot create an AliEn collection from %s\", xmlfile);" << endl;
1091 out << " return NULL;" << endl;
1092 out << " }" << endl;
1093 out << " TChain *chain = new TChain(treename);" << endl;
1094 out << " coll->Reset();" << endl;
1095 out << " while (coll->Next()) chain->Add(coll->GetTURL(\"\"));" << endl;
1096 out << " if (!chain->GetNtrees()) {" << endl;
1097 out << " ::Error(\"CreateChain\", \"No tree found from collection %s\", xmlfile);" << endl;
1098 out << " return NULL;" << endl;
1099 out << " }" << endl;
1100 out << " return chain;" << endl;
1103 Info("WriteAnalysisMacro", "\n##### Analysis macro to run on worker nodes <%s> written",fAnalysisMacro.Data());
1105 Bool_t copy = kTRUE;
1106 if (TestBit(AliAnalysisGrid::kOffline) || TestBit(AliAnalysisGrid::kTest)) copy = kFALSE;
1109 TString workdir = gGrid->GetHomeDirectory();
1110 workdir += fGridWorkingDir;
1111 if (FileExists(fAnalysisMacro)) gGrid->Rm(fAnalysisMacro);
1112 if (IsUsingTags() && !gSystem->AccessPathName("ConfigureCuts.C")) {
1113 if (FileExists("ConfigureCuts.C")) gGrid->Rm("ConfigureCuts.C");
1114 Info("WriteAnalysisMacro", "\n##### Copying cuts configuration macro: <ConfigureCuts.C> to your alien workspace");
1115 TFile::Cp("file:ConfigureCuts.C", Form("alien://%s/ConfigureCuts.C", workdir.Data()));
1117 Info("WriteAnalysisMacro", "\n##### Copying analysis macro: <%s> to your alien workspace", fAnalysisMacro.Data());
1118 TFile::Cp(Form("file:%s",fAnalysisMacro.Data()), Form("alien://%s/%s", workdir.Data(), fAnalysisMacro.Data()));
1122 //______________________________________________________________________________
1123 void AliAnalysisAlien::WriteExecutable()
1125 // Generate the alien executable script.
1126 if (!TestBit(AliAnalysisGrid::kSubmit)) {
1128 out.open(fExecutable.Data(), ios::out);
1130 Error("CreateJDL", "Bad file name for executable: %s", fExecutable.Data());
1133 out << "#!/bin/bash" << endl;
1134 out << "export GCLIENT_SERVER_LIST=\"pcapiserv04.cern.ch:10000|pcapiserv05.cern.ch:10000|pcapiserv06.cern.ch:10000|pcapiserv07.cern.ch:10000\"" << endl;
1135 out << "echo \"=========================================\"" << endl;
1136 out << "echo \"############## PATH : ##############\"" << endl;
1137 out << "echo $PATH" << endl;
1138 out << "echo \"############## LD_LIBRARY_PATH : ##############\"" << endl;
1139 out << "echo $LD_LIBRARY_PATH" << endl;
1140 out << "echo \"############## ROOTSYS : ##############\"" << endl;
1141 out << "echo $ROOTSYS" << endl;
1142 out << "echo \"############## which root : ##############\"" << endl;
1143 out << "which root" << endl;
1144 out << "echo \"############## ALICE_ROOT : ##############\"" << endl;
1145 out << "echo $ALICE_ROOT" << endl;
1146 out << "echo \"############## which aliroot : ##############\"" << endl;
1147 out << "which aliroot" << endl;
1148 out << "echo \"=========================================\"" << endl << endl;
1149 // if (TestBit(AliAnalysisGrid::kTest)) out << "root ";
1150 out << "root -b -q ";
1151 out << fAnalysisMacro.Data() << endl << endl;
1152 out << "echo \"======== " << fAnalysisMacro.Data() << " finished ========\"" << endl;
1154 Bool_t copy = kTRUE;
1155 if (TestBit(AliAnalysisGrid::kOffline) || TestBit(AliAnalysisGrid::kTest)) copy = kFALSE;
1158 TString workdir = gGrid->GetHomeDirectory();
1159 workdir += fGridWorkingDir;
1160 TString executable = Form("%s/bin/%s", gGrid->GetHomeDirectory(), fExecutable.Data());
1161 if (FileExists(executable)) gGrid->Rm(executable);
1162 Info("CreateJDL", "\n##### Copying executable file <%s> to your AliEn bin directory", fExecutable.Data());
1163 TFile::Cp(Form("file:%s",fExecutable.Data()), Form("alien://%s", executable.Data()));
1167 //______________________________________________________________________________
1168 void AliAnalysisAlien::WriteValidationScript()
1170 // Generate the alien validation script.
1171 // Generate the validation script
1174 Error("WriteValidationScript", "Alien connection required");
1177 TString out_stream = "";
1178 if (!TestBit(AliAnalysisGrid::kTest)) out_stream = " >> stdout";
1179 if (!TestBit(AliAnalysisGrid::kSubmit)) {
1181 out.open("validate.sh", ios::out);
1182 out << "#!/bin/bash" << endl;
1183 out << "##################################################" << endl;
1184 out << "validateout=`dirname $0`" << endl;
1185 out << "validatetime=`date`" << endl;
1186 out << "validated=\"0\";" << endl;
1187 out << "error=0" << endl;
1188 out << "if [ -z $validateout ]" << endl;
1189 out << "then" << endl;
1190 out << " validateout=\".\"" << endl;
1191 out << "fi" << endl << endl;
1192 out << "cd $validateout;" << endl;
1193 out << "validateworkdir=`pwd`;" << endl << endl;
1194 out << "echo \"*******************************************************\"" << out_stream << endl;
1195 out << "echo \"* Automatically generated validation script *\"" << out_stream << endl;
1197 out << "echo \"* Time: $validatetime \"" << out_stream << endl;
1198 out << "echo \"* Dir: $validateout\"" << out_stream << endl;
1199 out << "echo \"* Workdir: $validateworkdir\"" << out_stream << endl;
1200 out << "echo \"* ----------------------------------------------------*\"" << out_stream << endl;
1201 out << "ls -la ./" << out_stream << endl;
1202 out << "echo \"* ----------------------------------------------------*\"" << out_stream << endl << endl;
1203 out << "##################################################" << endl;
1204 TObjArray *arr = fOutputFiles.Tokenize(" ");
1206 TString output_file;
1207 while ((os=(TObjString*)next1())) {
1208 output_file = os->GetString();
1209 Int_t index = output_file.Index("@");
1210 if (index > 0) output_file.Remove(index);
1211 out << "if ! [ -f " << output_file.Data() << " ] ; then" << endl;
1212 out << " error=1" << endl;
1213 out << " echo \"Output file(s) not found. Job FAILED !\"" << out_stream << endl;
1214 out << " echo \"Output file(s) not found. Job FAILED !\" >> stderr" << endl;
1215 out << "fi" << endl;
1218 out << "if [ $error = 0 ] ; then" << endl;
1219 out << " echo \"* ---------------- Job Validated ------------------*\"" << out_stream << endl;
1220 out << "fi" << endl;
1222 out << "echo \"* ----------------------------------------------------*\"" << out_stream << endl;
1223 out << "echo \"*******************************************************\"" << out_stream << endl;
1224 out << "cd -" << endl;
1225 out << "exit $error" << endl;
1227 Bool_t copy = kTRUE;
1228 if (TestBit(AliAnalysisGrid::kOffline) || TestBit(AliAnalysisGrid::kTest)) copy = kFALSE;
1231 TString workdir = gGrid->GetHomeDirectory();
1232 workdir += fGridWorkingDir;
1233 Info("CreateJDL", "\n##### Copying validation script <validate.sh> to your AliEn working space");
1234 if (FileExists("validate.sh")) gGrid->Rm("validate.sh");
1235 TFile::Cp("file:validate.sh", Form("alien://%s/validate.sh", workdir.Data()));