1 /**************************************************************************
2 * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
4 * Author: The ALICE Off-line Project. *
5 * Contributors are mentioned in the code where appropriate. *
7 * Permission to use, copy, modify and distribute this software and its *
8 * documentation strictly for non-commercial purposes is hereby granted *
9 * without fee, provided that the above copyright notice appears in all *
10 * copies and that both the copyright notice and this permission notice *
11 * appear in the supporting documentation. The authors make no claims *
12 * about the suitability of this software for any purpose. It is *
13 * provided "as is" without express or implied warranty. *
14 **************************************************************************/
18 //////////////////////////////////////////////////////////////////////////
19 // marian.ivanov@cern.ch
20 // Utilities for file merging.
21 // Additional functionality on top of the standard TFileMerger:
23 // 1. Possibility to Set the reject/accept list.
24 // 1.a) Only entries selected in accept list are merged. By default all entries are selected
25 // use AddAccept 0 to specify your desired entry
26 // 1.b) Entries selected in reject list are not merged. By default the reject list is empty.
28 // 2. syswatch.log is created diring mergin procedure.
29 // Memeory consumption - for reading and for merging can be monitored
31 // RS: Changed merger to respect the structure of files being merged (directories, collections...)
32 // Additional option: SetNoTrees (default false) to not merge any tree
33 // The code mostly taken from root's hadd.cxx
36 // Libraries for all classes to be merged should be loaded before using the class
37 gSystem->Load("libANALYSIS");
38 gSystem->Load("libANALYSIScalib");
39 gSystem->Load("libTPCcalib");
42 //Example usage starting from the input data list in text file:
45 merger.AddReject("esdFriend");
46 merger.IterTXT("calib.list","CalibObjects.root",kFALSE);
50 //////////////////////////////////////////////////////////////////////////
54 #include <THashList.h>
62 #include "TGridResult.h"
63 #include "TObjString.h"
64 #include "TObjArray.h"
65 #include "TMethodCall.h"
66 #include "Riostream.h"
67 #include "AliSysInfo.h"
68 #include "AliFileMerger.h"
75 ClassImp(AliFileMerger)
77 ProcInfo_t procInfo;//TMP
79 ////////////////////////////////////////////////////////////////////////
81 AliFileMerger::AliFileMerger():
89 // Default constructor
93 //______________________________________________________________________
94 AliFileMerger::AliFileMerger(const char* name):
106 //______________________________________________________________________
107 AliFileMerger::~AliFileMerger()
114 void AliFileMerger::IterAlien(const char* outputDir, const char* outputFileName, const char* pattern, Bool_t dontOverwrite){
117 // Merge the files coming out of the calibration job
120 // looking for files to be merged in the output directory
121 command = Form("find %s/ *%s", outputDir, pattern);
122 printf("command: %s\n", command.Data());
123 TGrid::Connect("alien://");
124 TGridResult *res = gGrid->Command(command);
128 // loop over the results
130 sourcelist.SetOwner(kTRUE);
132 while((map=(TMap*)nextmap())) {
134 TObjString *objs = dynamic_cast<TObjString*>(map->GetValue("turl"));
135 if (!objs || !objs->GetString().Length()) {
136 // Nothing found - skip this output
140 printf("looking for file %s\n",(objs->GetString()).Data());
141 AddFile(&sourcelist, (objs->GetString()).Data());;
144 IterList(&sourcelist, outputFileName, dontOverwrite);
148 void AliFileMerger::IterList(const TList* namesList, const char* outputFileName, Bool_t dontOverwrite)
150 // merge in steps or in one go
152 gSystem->GetProcInfo(&procInfo);
153 AliInfo(Form(">> memory usage %ld %ld", procInfo.fMemResident, procInfo.fMemVirtual));
155 TString outputFile(outputFileName);
156 gSystem->ExpandPathName(outputFile);
158 int nFiles = namesList->GetEntries();
159 int maxSrcOpen = fMaxFilesOpen - 1;
161 filesList.SetOwner(kTRUE);
163 TString tmpDest[2] = {outputFile,outputFile}; // names for tmp files
164 int npl = outputFile.Last('.');
165 if (npl<0) npl = outputFile.Length();
166 for (int i=0;i<2;i++) tmpDest[i].Insert(npl,Form("_TMPMERGE%d_",i));
168 int nsteps = 0, currTmp = 0, start = 0;
169 for (int ifl=0;ifl<nFiles;ifl++) {
170 int st = ifl%maxSrcOpen;
171 if (st==0 && ifl) { // new chunk should be started, merge what was already accumulated
172 OpenNextChunks(namesList,&filesList,start,ifl-1);
173 start = ifl; // remember where to start next step
174 if (nsteps++) { // if not 1st one, merge the privous chunk with this one
175 filesList.AddFirst(TFile::Open(tmpDest[currTmp].Data()));
176 currTmp = (currTmp==0) ? 1:0; // swap tmp files
179 TFile* targetTmp = TFile::Open( tmpDest[currTmp].Data(), "RECREATE");
180 if (!targetTmp || targetTmp->IsZombie()) {
181 printf("Error opening temporary file %s\n",tmpDest[currTmp].Data());
184 MergeRootfile(targetTmp, &filesList);
187 filesList.Clear(); // close all open files
189 // nothing to do until needed amount of files is accumulated
192 TFile* target = TFile::Open( outputFile.Data(), (dontOverwrite ? "CREATE":"RECREATE") );
193 if (!target || target->IsZombie()) {
194 cerr << "Error opening target file (does " << outputFileName << " exist?)." << endl;
195 cerr << "Use force = kTRUE to re-creation of output file." << endl;
198 OpenNextChunks(namesList,&filesList,start,nFiles-1);
199 // add result of previous merges
200 if (nsteps) filesList.AddFirst(TFile::Open(tmpDest[currTmp].Data()));
201 MergeRootfile( target, &filesList);
206 for (int i=0;i<2;i++) gSystem->Exec(Form("if [ -e %s ]; then \nrm %s\nfi",tmpDest[i].Data(),tmpDest[i].Data()));
208 printf("Merged %d files in %d steps\n",nFiles,++nsteps);
210 gSystem->GetProcInfo(&procInfo);
211 AliInfo(Form("<< memory usage %ld %ld", procInfo.fMemResident, procInfo.fMemVirtual));
214 void AliFileMerger::IterTXT( const char * fileList, const char* outputFileName, Bool_t dontOverwrite){
216 // Merge the files indicated in the list - fileList
217 // ASCII file option example:
218 // find `pwd`/ | grep AliESDfriends_v1.root > calib.list
220 // Open the input stream
223 // Read the input list of files
227 sourcelist.SetOwner(kTRUE);
230 if (!objfile.Contains(".root")) continue; // protection
231 gSystem->ExpandPathName(objfile);
232 printf("Add file:Counter\t%d\tMerging file %s\n",counter++,objfile.Data());
233 AddFile(&sourcelist, objfile.Data());
236 IterList(&sourcelist, outputFileName, dontOverwrite);
240 void AliFileMerger::StoreResults(TObjArray * array, const char* outputFileName){
242 // Storing the results in one single file
244 TFile *f = new TFile(outputFileName,"recreate");
245 for (Int_t i=0; i<array->GetEntries(); i++){
246 TObject *object0 = array->At(i);
247 if (!object0) continue;
255 void AliFileMerger::StoreSeparateResults(TObjArray * array, const char* outputFileName){
257 // Store the results in separate files (one per object)
259 for (Int_t i=0; i<array->GetEntries(); i++){
260 TObject *object0 = array->At(i);
261 if (!object0) continue;
262 TFile *f = new TFile(Form("%s_%s.root",outputFileName,object0->GetName()),"recreate");
269 void AliFileMerger::Merge(TFile* fileIn, TObjArray * array){
274 static Int_t counter=-1;
276 TObjArray *carray = new TObjArray; //array of the objects inside current file
277 carray->SetOwner(kTRUE);
279 // load all objects to memory
281 TList *farr = fileIn->GetListOfKeys();
286 for (Int_t ical=0; ical<farr->GetEntries(); ical++){
287 if (!farr->At(ical)) continue;
288 TString name(farr->At(ical)->GetName());
289 if (!IsAccepted(name)) continue; // skip not accepted entries
290 TObject *obj = fileIn->Get(name.Data());
291 if (obj) carray->AddLast(obj);
292 AliSysInfo::AddStamp(name.Data(),1,ical,counter);
295 if (carray->GetEntries()==0) {
300 Int_t entries =carray->GetEntriesFast();
301 for (Int_t i=0; i<entries; i++){
303 TObjArray *templist = new TObjArray(1);
304 templist->SetOwner(kFALSE);
305 TObject *currentObject = carray->At(i);
306 if (!currentObject) {
310 printf("%s\n",currentObject->GetName());
311 callEnv.InitWithPrototype(currentObject->IsA(), "Merge", "TCollection*");
312 if (!callEnv.IsValid()) {
316 TString oname=currentObject->GetName();
317 TObject *mergedObject = array->FindObject(currentObject->GetName());
319 array->AddLast(currentObject);
324 templist->AddLast(currentObject);
325 callEnv.SetParam((Long_t) templist);
326 callEnv.Execute(mergedObject);
327 AliSysInfo::AddStamp(currentObject->GetName(),2,i,counter);
334 Bool_t AliFileMerger::IsAccepted(TString name){
336 // Accept/reject logic
337 // name - name of the entry
339 // if fAcceptMask specified - entry has to be in list of selected
340 // if fRejectMask speciefied - entry with name speciief in the list are rejected
346 for (Int_t iaccept=0; iaccept<fAcceptMask->GetEntries(); iaccept++){
347 if (name.Contains(fAcceptMask->At(iaccept)->GetName())) accept=kTRUE; // entry was selected
350 if (!accept) return kFALSE;
354 for (Int_t ireject=0; ireject<fRejectMask->GetEntries(); ireject++){
355 if (name.Contains(fRejectMask->At(ireject)->GetName())) accept=kFALSE; // entry was rejected
361 Bool_t AliFileMerger::IsRejected(TString name){
363 // check is the name is explicitly in the rejection list
364 // if fRejectMask speciefied - entry with name speciief in the list are rejected
366 Bool_t reject=kFALSE;
369 for (Int_t ireject=0; ireject<fRejectMask->GetEntries(); ireject++){
370 if (name.Contains(fRejectMask->At(ireject)->GetName())) {reject=kTRUE; break;} // entry was rejected
378 void AliFileMerger::AddReject(const char *reject)
381 // add reject string to the list of entries to be rejected for merging
384 fRejectMask = new TObjArray();
385 fRejectMask->SetOwner(kTRUE);
387 fRejectMask->AddLast(new TObjString(reject));
390 void AliFileMerger::AddAccept(const char *accept)
393 // add reject string to the list of entries to be rejected for merging
396 fAcceptMask = new TObjArray();
397 fAcceptMask->SetOwner(kTRUE);
399 fAcceptMask->AddLast(new TObjString(accept));
402 //___________________________________________________________________________
403 int AliFileMerger::MergeRootfile( TDirectory *target, TList *sourcelist, Bool_t nameFiltering)
405 // Merge all objects in a directory
406 // modified version of root's hadd.cxx
407 gSystem->GetProcInfo(&procInfo);
408 AliInfo(Form(">> memory usage %ld %ld", procInfo.fMemResident, procInfo.fMemVirtual));
411 cout << "Target path: " << target->GetPath() << endl;
412 TString path( (char*)strstr( target->GetPath(), ":" ) );
415 // find 1st valid file
416 TDirectory *first_source = (TDirectory*)sourcelist->First();
418 Int_t nguess = sourcelist->GetSize()+1000;
419 THashList allNames(nguess);
420 allNames.SetOwner(kTRUE);
421 ((THashList*)target->GetList())->Rehash(nguess);
422 ((THashList*)target->GetListOfKeys())->Rehash(nguess);
425 listHargs.Form("((TCollection*)0x%lx)", (ULong_t)&listH);
427 while(first_source) {
429 TDirectory *current_sourcedir = first_source->GetDirectory(path);
430 if (!current_sourcedir) {
431 first_source = (TDirectory*)sourcelist->After(first_source);
434 // loop over all keys in this directory
435 TChain *globChain = 0;
436 TIter nextkey( current_sourcedir->GetListOfKeys() );
437 TKey *key, *oldkey=0;
438 //gain time, do not add the objects in the list in memory
439 TH1::AddDirectory(kFALSE);
444 while ( (key = (TKey*)nextkey())) {
445 if (current_sourcedir == target) break;
447 // check if we don't reject this name
448 TString nameK(key->GetName());
449 if ((!IsAccepted(nameK) && nameFiltering) || (!nameFiltering && IsRejected(nameK))) {
450 if (!counterF) printf("Object %s is in rejection list, skipping...\n",nameK.Data());
454 //keep only the highest cycle number for each key
455 if (oldkey && !strcmp(oldkey->GetName(),key->GetName())) continue;
456 if (!strcmp(key->GetClassName(),"TProcessID")) {key->ReadObj(); continue;}
457 if (allNames.FindObject(key->GetName())) continue;
458 TClass *cl = TClass::GetClass(key->GetClassName());
459 if (!cl || !cl->InheritsFrom(TObject::Class())) {
460 cout << "Cannot merge object type, name: "
461 << key->GetName() << " title: " << key->GetTitle() << endl;
464 printf("Merging object %s, anchor directory: %s\n",key->GetName(),key->GetMotherDir()->GetPath());
465 allNames.Add(new TObjString(key->GetName()));
466 AliSysInfo::AddStamp(nameK.Data(),1,++counterK,counterF++);
467 // read object from first source file
468 //current_sourcedir->cd();
470 TDirectory* currDir = gDirectory;
471 key->GetMotherDir()->cd();
472 TObject *obj = key->ReadObj();
475 AliError(Form("Failed to get the object with key %s from %s",key->GetName(),current_sourcedir->GetFile()->GetName()));
479 if ( obj->IsA()->InheritsFrom( TTree::Class() ) ) {
481 // loop over all source files create a chain of Trees "globChain"
485 obj_name = path + "/" + obj->GetName();
487 obj_name = obj->GetName();
489 globChain = new TChain(obj_name);
490 globChain->Add(first_source->GetName());
491 TFile *nextsource = (TFile*)sourcelist->After( first_source );
492 while ( nextsource ) {
493 //do not add to the list a file that does not contain this Tree
494 TFile *curf = TFile::Open(nextsource->GetName());
496 Bool_t mustAdd = kFALSE;
497 if (curf->FindKey(obj_name)) {
500 //we could be more clever here. No need to import the object
501 //we are missing a function in TDirectory
502 TObject *aobj = curf->Get(obj_name);
503 if (aobj) { mustAdd = kTRUE; delete aobj;}
506 globChain->Add(nextsource->GetName());
510 nextsource = (TFile*)sourcelist->After( nextsource );
513 } else if ( obj->IsA()->InheritsFrom( TDirectory::Class() ) ) {
514 // it's a subdirectory
516 cout << "Found subdirectory " << obj->GetName() << endl;
517 // create a new subdir of same name and title in the target file
519 TDirectory *newdir = target->mkdir( obj->GetName(), obj->GetTitle() );
521 // newdir is now the starting point of another round of merging
522 // newdir still knows its depth within the target file via
523 // GetPath(), so we can still figure out where we are in the recursion
524 status = MergeRootfile( newdir, sourcelist, kFALSE);
525 if (status) return status;
527 } else if ( obj->InheritsFrom(TObject::Class())
528 && obj->IsA()->GetMethodWithPrototype("Merge", "TCollection*") ) {
529 // object implements Merge(TCollection*)
531 // loop over all source files and merge same-name object
532 TFile *nextsource = (TFile*)sourcelist->After( first_source );
533 while ( nextsource ) {
534 // make sure we are at the correct directory level by cd'ing to path
535 TDirectory *ndir = nextsource->GetDirectory(path);
538 TKey *key2 = (TKey*)gDirectory->GetListOfKeys()->FindObject(key->GetName());
540 TObject *hobj = key2->ReadObj();
542 cout << "Failed to get the object with key " << key2->GetName() << " from " <<
543 ndir->GetFile()->GetName() << "/" << ndir->GetName() << endl;
544 nextsource = (TFile*)sourcelist->After( nextsource );
548 hobj->ResetBit(kMustCleanup);
551 obj->Execute("Merge", listHargs.Data(), &error); // RS Probleme here
553 cerr << "Error calling Merge() on " << obj->GetName()
554 << " with the corresponding object in " << nextsource->GetName() << endl;
557 // get the number of processed entries to be put in the syswatch.log
558 Double_t numberOfEntries = -1;
559 if (obj->IsA()->GetMethodAllAny("GetEntries"))
561 TMethodCall getEntries(obj->IsA(), "GetEntries", "");
562 getEntries.Execute(obj, numberOfEntries);
564 AliSysInfo::AddStamp(nameK.Data(),1,counterK,counterF++,numberOfEntries);
567 nextsource = (TFile*)sourcelist->After( nextsource );
569 } else if ( obj->IsA()->InheritsFrom( THStack::Class() ) ) {
570 THStack *hstack1 = (THStack*) obj;
571 TList* l = new TList();
573 // loop over all source files and merge the histos of the
574 // corresponding THStacks with the one pointed to by "hstack1"
575 TFile *nextsource = (TFile*)sourcelist->After( first_source );
576 while ( nextsource ) {
577 // make sure we are at the correct directory level by cd'ing to path
578 TDirectory *ndir = nextsource->GetDirectory(path);
581 TKey *key2 = (TKey*)gDirectory->GetListOfKeys()->FindObject(hstack1->GetName());
583 THStack *hstack2 = (THStack*) key2->ReadObj();
584 l->Add(hstack2->GetHists()->Clone());
586 AliSysInfo::AddStamp(nameK.Data(),1,counterK,counterF++);
590 nextsource = (TFile*)sourcelist->After( nextsource );
592 hstack1->GetHists()->Merge(l);
596 // object is of no type that we can merge
597 cout << "Cannot merge object type, name: "
598 << obj->GetName() << " title: " << obj->GetTitle() << endl;
600 // loop over all source files and write similar objects directly to the output file
601 TFile *nextsource = (TFile*)sourcelist->After( first_source );
602 while ( nextsource ) {
603 // make sure we are at the correct directory level by cd'ing to path
604 TDirectory *ndir = nextsource->GetDirectory(path);
607 TKey *key2 = (TKey*)gDirectory->GetListOfKeys()->FindObject(key->GetName());
609 TObject *nobj = key2->ReadObj();
610 nobj->ResetBit(kMustCleanup);
611 int nbytes1 = target->WriteTObject(nobj, key2->GetName(), "SingleKey" );
612 if (nbytes1 <= 0) status = -1;
616 nextsource = (TFile*)sourcelist->After( nextsource );
620 // now write the merged histogram (which is "in" obj) to the target file
621 // note that this will just store obj in the current directory level,
622 // which is not persistent until the complete directory itself is stored
623 // by "target->Write()" below
626 //!!if the object is a tree, it is stored in globChain...
627 if(obj->IsA()->InheritsFrom( TDirectory::Class() )) {
628 //printf("cas d'une directory\n");
629 } else if(obj->IsA()->InheritsFrom( TTree::Class() )) {
632 globChain->Merge(target->GetFile(),0,"keep fast");
636 int nbytes2 = obj->Write( key->GetName(), TObject::kSingleKey );
637 if (nbytes2 <= 0) status = -1;
641 } // while ( ( TKey *key = (TKey*)nextkey() ) )
642 first_source = (TDirectory*)sourcelist->After(first_source);
644 // save modifications to target file
645 target->SaveSelf(kTRUE);
647 gSystem->GetProcInfo(&procInfo);
648 AliInfo(Form("<< memory usage %ld %ld", procInfo.fMemResident, procInfo.fMemVirtual));
653 //___________________________________________________________________________
654 int AliFileMerger::OpenNextChunks(const TList* namesList, TList* filesList, Int_t from, Int_t to)
656 gSystem->GetProcInfo(&procInfo);
657 AliInfo(Form(">> memory usage %ld %ld", procInfo.fMemResident, procInfo.fMemVirtual));
660 int nEnt = namesList->GetEntries();
661 from = from<nEnt ? from : nEnt;
662 to = to<nEnt ? to : nEnt;
664 for (int i=from;i<=to;i++) {
665 TNamed* fnam = (TNamed*)namesList->At(i);
667 TString fnamS(fnam->GetName());
668 gSystem->ExpandPathName(fnamS);
669 if (fnamS.BeginsWith("alien://") && !gGrid) TGrid::Connect("alien");
670 TFile* source = TFile::Open(fnam->GetName());
671 if( source==0 ) { printf("Failed to open file %s, will skip\n",fnam->GetName()); continue; }
672 filesList->Add(source);
673 printf("Opened file %s\n",fnam->GetName());
676 gSystem->GetProcInfo(&procInfo);
677 AliInfo(Form("<< memory usage %ld %ld", procInfo.fMemResident, procInfo.fMemVirtual));
683 //___________________________________________________________________________
684 int AliFileMerger::AddFile(TList* namesList, std::string entry)
686 // add a new file to the list of files
687 // static int count(0);
688 if( entry.empty() ) return 0;
689 size_t j =entry.find_first_not_of(' ');
690 if( j==std::string::npos ) return 0;
691 entry = entry.substr(j);
692 if( entry.substr(0,1)=="@") {
693 std::ifstream indirect_file(entry.substr(1).c_str() );
694 if( ! indirect_file.is_open() ) {
695 std::cerr<< "Could not open indirect file " << entry.substr(1) << std::endl;
698 while( indirect_file ){
700 std::getline(indirect_file, line);
701 if( AddFile(namesList, line)!=0 ) return 1;;
705 // cout << "Source file " << (++count) << ": " << entry << endl;
706 namesList->Add(new TNamed(entry,""));