Changes for report #56707: On the proper usage of syswatch.log (Marian)
authorhristov <hristov@f7af4fe6-9843-0410-8265-dc069ae4e863>
Mon, 26 Oct 2009 16:01:28 +0000 (16:01 +0000)
committerhristov <hristov@f7af4fe6-9843-0410-8265-dc069ae4e863>
Mon, 26 Oct 2009 16:01:28 +0000 (16:01 +0000)
STEER/AliSysInfo.cxx
STEER/AliSysInfo.h
macros/PlotSys.C

index 80800ec..0dcb6b8 100644 (file)
 // Origin:  marian.ivanov@cern.ch
 //
 //  Make   a log file for the CPU and Memory usage
+//
+//  Principles:
+//  Snapshots of the system information are writen to the text log files.
+//  Text files were chosen in order to get the ouptu also in case code 
+//  is crashing. 
+//  Following information is stored in the log file:
+//  TTimeStamp stamp;
+//  CpuInfo_t  cpuInfo;
+//  MemInfo_t  memInfo;
+//  ProcInfo_t procInfo;
+// 
+//  Root TSystem is used to retrieve this information:
+//  gSystem->GetCpuInfo(&cpuInfo, 10);
+//  gSystem->GetMemInfo(&memInfo);
+//  gSystem->GetProcInfo(&procInfo);
+//  for details see:
+//  http://root.cern.ch/root/html/TUnixSystem.html
+//  http://root.cern.ch/root/html/ProcInfo_t.html
+//  http://root.cern.ch/root/html/MemInfo_t.html
+//  http://root.cern.ch/root/html/CpuInfo_t.html
+//  -------------------------------------------------------------------
+//  class CpuInfo_t
+//   Float_t   fIdle   cpu idle percentage
+//   Float_t   fLoad15m        cpu load average over 15 m
+//   Float_t   fLoad1m cpu load average over 1 m
+//   Float_t   fLoad5m cpu load average over 5 m
+//   Float_t   fSys    cpu sys load in percentage
+//   Float_t   fTotal  cpu user+sys load in percentage
+//   Float_t   fUser   cpu user load in percentage
+
+//  -------------------------------------------------------------------
+//  class ProcInfo_t:
+//  Float_t    fCpuSys system time used by this process in seconds
+//  Float_t    fCpuUser        user time used by this process in seconds
+//  Long_t     fMemResident    resident memory used by this process in KB
+//  Long_t     fMemVirtual     virtual memory used by this process in KB
+//  -------------------------------------------------------------------
+
+
+//  The information from the AliSysInfo can be used as measurement
+//  of the code quality. Be aware of the limitation induced by
+//  using only system info described in the  AliSysInfo::Test() function
+// 
+//  The example usage of the AliSysInfo is shown in the
+//  AliSysInfo::Test() example.
+//  
 //  
-//  The typical usage:
+//
+//  
+//  The typical usage in the AliRoot code:
 //  Make a set of stamps in the code in the place of interest
 //  e.g. 
 //
@@ -60,7 +108,7 @@ AliSysInfo::AliSysInfo():
   fTimer = new TStopwatch;
   fSysWatch  = new fstream("syswatch.log", ios_base::out|ios_base::trunc);
 
-  //hname/C:sname/C:sec/I:mI.fMemUsed/F:mI.fSwapUsed/F:pI.fMemResident/F:pI.fMemVirtual/F:cI.fUser/F:cI.fSys/F:cI.fCpuUser/F:pI.fCpuSys/F
+  //hname/C:sname/C:sec/D:mI.fMemUsed/F:mI.fSwapUsed/F:pI.fMemResident/F:pI.fMemVirtual/F:cI.fUser/F:cI.fSys/F:cI.fCpuUser/F:pI.fCpuSys/F
 
 
   (*fSysWatch) <<"hname"<<"/C:"               // hname - hostname  
@@ -68,29 +116,29 @@ AliSysInfo::AliSysInfo():
                <<"id0"<<"/I:"                // 0 id
                <<"id1"<<"/I:"                // 1 id
                <<"id2"<<"/I:"                // 1 id
-               <<"first"<<"/I:"              // first stamp
+               <<"first"<<"/D:"              // first stamp
     //
-              <<"stampSec"<<"/I:"         // time  - time stamp in seconds
-              <<"mi.fMemUsed"<<"/F:"       // system info 
-              <<"mi.fSwapUsed"<<"/F:"      //
-              <<"cI.fUser"<<"/F:"         //
-              <<"cI.fSys"<<"/F:"         //
+              <<"stampSec"<<"/D:"         // time  - time stamp in seconds
+              <<"mi.fMemUsed"<<"/D:"       // system info 
+              <<"mi.fSwapUsed"<<"/D:"      //
+              <<"cI.fUser"<<"/D:"         //
+              <<"cI.fSys"<<"/D:"         //
     // 
-              <<"pI.fMemResident"<<"/F:"  // process info
-              <<"pI.fMemVirtual"<<"/F:"   //    
-              <<"pI.fCpuUser"<<"/F:"      //
-              <<"pI.fCpuSys"<<"/F:"       //
+              <<"pI.fMemResident"<<"/D:"  // process info
+              <<"pI.fMemVirtual"<<"/D:"   //    
+              <<"pI.fCpuUser"<<"/D:"      //
+              <<"pI.fCpuSys"<<"/D:"       //
     //
-              <<"stampOldSec"<<"/I:"         // time  - time stamp in seconds
-              <<"miOld.fMemUsed"<<"/F:"       // system info - previous
-              <<"miOld.fSwapUsed"<<"/F:"      //
-              <<"cIOld.fUser"<<"/F:"         //
-              <<"cIOld.fSys"<<"/F:"         //
+              <<"stampOldSec"<<"/D:"         // time  - time stamp in seconds
+              <<"miOld.fMemUsed"<<"/D:"       // system info - previous
+              <<"miOld.fSwapUsed"<<"/D:"      //
+              <<"cIOld.fUser"<<"/D:"         //
+              <<"cIOld.fSys"<<"/D:"         //
     // 
-              <<"pIOld.fMemResident"<<"/F:"  // process info -previous
-              <<"pIOld.fMemVirtual"<<"/F:"   //    
-              <<"pIOld.fCpuUser"<<"/F:"      //
-              <<"pIOld.fCpuSys"<<"/F"       //
+              <<"pIOld.fMemResident"<<"/D:"  // process info -previous
+              <<"pIOld.fMemVirtual"<<"/D:"   //    
+              <<"pIOld.fCpuUser"<<"/D:"      //
+              <<"pIOld.fCpuSys"<<"/D"       //
               << endl;
   
 }
@@ -121,13 +169,13 @@ void AliSysInfo::AddStamp(const char *sname, Int_t id0, Int_t id1, Int_t id2){
   gSystem->GetCpuInfo(&cpuInfo, 10);
   gSystem->GetMemInfo(&memInfo);
   gSystem->GetProcInfo(&procInfo);
-  procInfo.fMemVirtual/=1024;  //size in MBy
-  procInfo.fMemResident/=1024;  //size in MBy
+  //  procInfo.fMemVirtual/=1024;  //size in MBy
+  //procInfo.fMemResident/=1024;  //size in MBy
 
   const char * hname = gSystem->HostName();
 
   static Int_t entry=0;
-  static Int_t first=stamp.GetSec();
+  static Double_t  first=stamp.GetSec()+stamp.GetNanoSec()/1000000000.;
   //
   static TTimeStamp stampOld;
   static CpuInfo_t  cpuInfoOld;
@@ -135,35 +183,37 @@ void AliSysInfo::AddStamp(const char *sname, Int_t id0, Int_t id1, Int_t id2){
   static ProcInfo_t procInfoOld;  
 
 
-  (*(Instance()->fSysWatch)) << hname   <<"\t"               // hname - hostname  
-               << sname    <<"\t"              // stamp name
-               << id0      <<"\t"
-               << id1      <<"\t"
-               << id2      <<"\t"
-               << first    <<"\t"              // first stamp               
-               //
-              << stamp.GetSec()<<"\t"         // time  - time stamp in seconds
-              << memInfo.fMemUsed<<"\t"       // system info 
-              << memInfo.fSwapUsed<<"\t"      //
-              << cpuInfo.fUser <<"\t"         //
-              << cpuInfo.fSys  <<"\t"         //
-               // 
-              << procInfo.fMemResident<<"\t"  // process info
-              << procInfo.fMemVirtual<<"\t"   //    
-              << procInfo.fCpuUser<<"\t"      //
-              << procInfo.fCpuSys<<"\t"       //
+  (*(Instance()->fSysWatch)) 
+    << hname   <<"\t"               // hname - hostname  
+    << sname    <<"\t"              // stamp name
+    << id0      <<"\t"
+    << id1      <<"\t"
+    << id2      <<"\t"
+    <<setprecision(15)<< first    <<"\t"              // first stamp               
     //
-              << stampOld.GetSec()<<"\t"         // time  - time stamp in seconds
-              << memInfoOld.fMemUsed<<"\t"       // system info - previous
-              << memInfoOld.fSwapUsed<<"\t"      //
-              << cpuInfoOld.fUser <<"\t"         //
-              << cpuInfoOld.fSys  <<"\t"         //
-               // 
-              << procInfoOld.fMemResident<<"\t"  // process info -previous
-              << procInfoOld.fMemVirtual<<"\t"   //    
-              << procInfoOld.fCpuUser<<"\t"      //
-              << procInfoOld.fCpuSys<<"\t"       //
-              << endl;
+    <<setprecision(15)<< stamp.GetSec()+stamp.GetNanoSec()/1000000000.<<"\t"         // time  - time stamp in seconds
+    << memInfo.fMemUsed<<"\t"       // system info 
+    << memInfo.fSwapUsed<<"\t"      //
+    << cpuInfo.fUser <<"\t"         //
+    << cpuInfo.fSys  <<"\t"         //
+    // 
+    <<setprecision(15)<< procInfo.fMemResident/1024.<<"\t"  // process info
+    <<setprecision(15)<< procInfo.fMemVirtual/1024.<<"\t"   //    
+    << procInfo.fCpuUser<<"\t"      //
+    << procInfo.fCpuSys<<"\t"       //
+    //
+    <<setprecision(15)<< stampOld.GetSec()+stampOld.GetNanoSec()/1000000000.<<"\t"         // time  - time stamp in seconds
+    << memInfoOld.fMemUsed<<"\t"       // system info - previous
+    << memInfoOld.fSwapUsed<<"\t"      //
+    << cpuInfoOld.fUser <<"\t"         //
+    << cpuInfoOld.fSys  <<"\t"         //
+    // 
+    <<setprecision(15)<< procInfoOld.fMemResident/1024.<<"\t"  // process info -previous
+    <<setprecision(15)<< procInfoOld.fMemVirtual/1024.<<"\t"   //    
+    << procInfoOld.fCpuUser<<"\t"      //
+    << procInfoOld.fCpuSys<<"\t"       //
+    << endl;
+
   stampOld   = stamp;
   cpuInfoOld = cpuInfo;
   memInfoOld = memInfo;
@@ -230,3 +280,44 @@ void AliSysInfo::AddCallBack(StampCallback_t callback){
   info->fCallBackFunc[info->fNCallBack]=callback;
   info->fNCallBack++;
 }
+
+
+
+TTree*  AliSysInfo::Test(){
+  //
+  // Test example for AliSysInfo:
+  // 1. Make huge memory leak
+  // 2. Slow down execution
+  /*
+    To use the test:
+    TTree * tree = AliSysInfo::Test();
+    // Make alias what we set as input
+    tree->SetAlias("deltaVMIn","(id0*100000+id1*10000+id2*1000)/1000000.")
+    tree->SetAlias("deltaVM","(pI.fMemVirtual-pIOld.fMemVirtual)");
+    tree->SetAlias("deltaTIn","(id1+id0*10)");
+    tree->SetAlias("deltaT","stampSec-stampOldSec");
+    //
+    tree->SetMarkerStyle(23); tree->SetMarkerSize(0.5);
+    // Memory usage
+    tree->Draw("deltaVM:deltaVMIn","Entry$>0");
+    // or alternative
+    tree->Draw("deltaVM:deltaVMIn","Entry$>0","prof");
+    //
+    // draw time usage
+    tree->Draw("deltaT:deltaTIn","Entry$>0"); 
+  */
+  //
+  // The delta of VM as obtained from the AliSysInfo starts to be proportional
+  // to  the input allocation after 0.12 MBy (and it is system dependent) 
+  // Bellow these limit the deltaVM can be used only in mean.
+  // (compare first and  profile histogram)
+  for (Int_t id0=0; id0<5; id0++)
+    for (Int_t id1=1; id1<10; id1++)
+      for (Int_t id2=0; id2<20; id2++){
+       new Char_t[id2*1000+id1*10000+id0*100000];  // emulate memory leak
+       gSystem->Sleep(id1+id0*10);         // emulate CPU usage 
+       AliSysInfo::AddStamp("Leak",id0,id1,id2);
+      }
+  TTree * tree = AliSysInfo::MakeTree("syswatch.log");
+  return tree;  
+}
index 05b6967..ea13fc7 100644 (file)
@@ -29,6 +29,7 @@ public:
   static Bool_t Contain(const char * str1, const char * str2);
   typedef void (*StampCallback_t)(const Char_t * desription);
   static  void AddCallBack(StampCallback_t callback);
+  static  TTree* Test();
 private:
   AliSysInfo(const AliSysInfo& source);
   AliSysInfo& operator= (const AliSysInfo& rec);
index c595f7f..a680b32 100644 (file)
@@ -4,24 +4,26 @@
   Input   -  syswatch.log  - text log file created by process to be monitored
   Output  -  syswatch.root - root files with default histograms
   Number of top violators - only top consumer displayed
-          
   Default histogram:  
-               
-  TOP violateors      - CPU and Virtual memory usage
+  TOP violators      - CPU and Virtual memory usage
   Detector reports    - CPU and Virtual memory usage per detector
-
-
-
-
-
-  Usage example:
-  .x ~/rootlogon.C
+  //
+  
+  Example usage:
+  //  1. Initialize
   gROOT->LoadMacro("$ALICE_ROOT/macros/PlotSys.C+");
-  MakePlots("syswatch.log","syswatch.root",10);
+  PInit("syswatch.log","syswatch.root","syswatch.sum");
+  //  2. Make ascii report.
+  SumDetector()
+  //  3. Make histos of top violators
+   MakePlots(20);
+  //  4. Browse the results
   TFile f("syswatch.root");
   TBrowser b;
-*/
+ */
 
+#include <stdio.h>
+#include "AliReconstruction.h"
 #include "TMath.h"
 #include "TH1F.h"
 #include "TH2F.h"
 #include "TStyle.h"
 #include "AliSysInfo.h"
 
+const Int_t kNDetectors=AliReconstruction::kNDetectors;
+const char* fgkDetectorName[kNDetectors] = {"ITS", "TPC", "TRD", "TOF", "PHOS", "HMPID", "EMCAL", "MUON", "FMD", "ZDC", "PMD", "T0", "VZERO", "ACORDE", "HLT"};
+
+
+
 TObject * htemp; 
 TTree *tree=0;
 TFile *fout=0;
+TString sumFile;
 TCut cutVM("cutVM","deltaVM>10");
 TCut cutDT("cutDT","deltaT>2"); 
-Int_t ctop=10;
+Int_t ctop=20;
 
 
 Float_t TopUsage(TTree* tree, const char *exp, const char*cut, Int_t order);
+Double_t SumUsage(TTree* tree, const char *exp, const char*cut);
 void TopVM();
 void TopCPU();
 void TopVMDetector();
 void TopCPUDetector();
+void SumDetector();
 
-void PInit(const char *log="syswatch.log", const char *out="syswatch.root"){
+void PInit(const char *log="syswatch.log", const char *out="syswatch.root", const char * sumName="syswatch.sum"){
   //
   // Set Input output
   //
   tree = AliSysInfo::MakeTree(log);
   fout = new TFile(out,"recreate");
+  sumFile=sumName;
 }
 
 
 
-void MakePlots(const char *log="syswatch.log", const char *out="syswatch.root", Int_t top=10){
+void MakePlots(Int_t top=20){
   //
   //
   //
   ctop=top;
-  PInit(log,out);
   gStyle->SetOptStat(0);
   //
   // Top users
@@ -70,16 +80,13 @@ void MakePlots(const char *log="syswatch.log", const char *out="syswatch.root",
   //
   // Reports per detector
   //
-  fout->mkdir("cpuDetector");
-  fout->mkdir("VMDetector");
-  //
-  fout->cd("VMDetector");
-  TopVMDetector();
-  //
   fout->cd();
-  fout->cd("cpuDetector");
+  for (Int_t idet=0; idet<kNDetectors;idet++){
+    fout->cd();
+    fout->mkdir(fgkDetectorName[idet]);
+  }
+  TopVMDetector();
   TopCPUDetector();
-
   //
   fout->Close();
   ctop=top;
@@ -99,7 +106,7 @@ void TopVM(){
   //
   printf("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n\n\n");
   printf("TOP Virtual memory user\n");
-  tree->Scan("deltaVM:sname",cutVM,"colsize=20");
+  //tree->Scan("deltaVM:sname",cutVM,"colsize=20");
   printf("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n\n\n");
   //
   tree->Draw("deltaVM:sname>>hhh","1"+cutVM,"*");  
@@ -147,7 +154,7 @@ void TopCPU(){
   //
   printf("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n");
   printf("/n/n/nTOP CPU user\n");
-  tree->Scan("deltaT:sname",cutDT,"colsize=20");
+  //tree->Scan("deltaT:sname",cutDT,"colsize=20");
   printf("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n");
   //
   tree->Draw("deltaT:sname>>hhh","id2<3"+cutDT,"*"); 
@@ -173,7 +180,9 @@ void TopVMDetector(){
   //
   //detector part
   //
-  for (Int_t idet=0; idet<12; idet++){
+  for (Int_t idet=0; idet<kNDetectors; idet++){
+    fout->cd();
+    fout->cd(fgkDetectorName[idet]);
     char cdet[100];
     char cdvm[100];
     sprintf(cdet,"id0==%d",idet);
@@ -181,32 +190,31 @@ void TopVMDetector(){
     sprintf(expr,"deltaVM:sname>>hhh");  
     //
     Float_t thDVM = TopUsage(tree,"deltaVM",cdet,ctop);
-    sprintf(cdvm,"%s&&deltaT>%f",cdet, thDVM);
-
-
+    sprintf(cdvm,"%s&&deltaVM>%f",cdet, thDVM);
     //
-    tree->Draw(expr,cdvm,"*"); 
+    tree->Draw(expr,cdvm,"GOFF"); 
     his2 = (TH2F*)(tree->GetHistogram())->Clone("xxx");
     delete tree->GetHistogram();
     his2->SetYTitle("Delta Virtual Memory (MBy)");
     his2->SetMarkerStyle(22);
     his2->SetMarkerSize(1); 
-    his2->Draw("l*");
+    //his2->Draw("l*");
     his2->Write(Form("DVMvsName_%d",idet));
     delete his2;
     //
     //    
     sprintf(expr,"VM:sname>>hhh");
-    tree->Draw(expr,cdvm,"*"); 
+    tree->Draw(expr,cdvm,"goff"); 
     his2 = (TH2F*)(tree->GetHistogram())->Clone("yyy");
     delete tree->GetHistogram();
     his2->SetYTitle("Delta Virtual Memory (MBy)");
     his2->SetMarkerStyle(22);
     his2->SetMarkerSize(1); 
-    his2->Draw("l*");
+    //his2->Draw("l*");
     his2->Write(Form("VMvsName_%d",idet));     
     delete his2;
   }
+  fout->cd();
 }
 
 
@@ -220,7 +228,9 @@ void TopCPUDetector(){
   //
   // CPU
   //
-  for (Int_t idet=0; idet<12; idet++){
+  for (Int_t idet=0; idet<kNDetectors; idet++){
+    fout->cd();
+    fout->cd(fgkDetectorName[idet]);
     char cdet[100];
     char cdtime[100];
     sprintf(cdet,"id0==%d",idet);
@@ -230,20 +240,47 @@ void TopCPUDetector(){
     Float_t thDT = TopUsage(tree,"deltaT",cdet,ctop);
     sprintf(cdtime,"%s&&deltaT>%f",cdet, thDT);
     //
-    tree->Draw(expr,cdtime,"*"); 
+    tree->Draw(expr,cdtime,"goff"); 
     his2 = (TH2F*)(tree->GetHistogram())->Clone("dtsname");
     delete tree->GetHistogram();
     his2->SetYTitle("Delta CPU time(sec)");
     his2->SetMarkerStyle(22);
     his2->SetMarkerSize(1); 
     his2->GetXaxis()->SetLabelSize(0.03);
-    his2->Draw("l*");
+    //his2->Draw("l*");
     his2->Write(Form("CPUvsName_%d",idet));
     delete his2;
   }
+  fout->cd();
+}
+
+void SumDetector(){
+  //
+  // Sum - detector information
+  //
+  FILE * pFile;
+  pFile = fopen (sumFile,"w");
+  char cdet[100];
+  char expr[100];
+  sprintf(cdet,"id0>=0&&id2>=0");
+  Double_t sumdTAll  = SumUsage(tree,"deltaT",cdet);
+  Double_t sumdVMAll = SumUsage(tree,"deltaVM",cdet);
+  printf("%s%s%s%s%s\n","Det/C:","sumDt/F:","sumDvm/F:","fracDt/F:","fracDvm/F");
+  printf("%s\t%f\t%f\t%f\t%f\t\n","all", sumdTAll,sumdVMAll,100.,100.);
+  fprintf(pFile,"%s%s%s%s%s\n","Det/C:","sumDt/F:","sumDvm/F:","fracDt/F:","fracDvm/F");
+  fprintf(pFile,"%s\t%f\t%f\t%f\t%f\t\n","all", sumdTAll,sumdVMAll,100.,100.);
+  for (Int_t idet=0; idet<kNDetectors; idet++){
+    sprintf(cdet,"id0==%d&&id2>=0",idet);
+    sprintf(expr,"deltaT:sname>>hhh");  
+    Double_t sumdT  = SumUsage(tree,"deltaT",cdet);
+    Double_t sumdVM = SumUsage(tree,"deltaVM",cdet);
+    printf("%s\t%f\t%f\t%f\t%f\t\n",fgkDetectorName[idet], sumdT,sumdVM,100.*sumdT/sumdTAll, 100.*sumdVM/sumdVMAll); 
+    fprintf(pFile,"%s\t%f\t%f\t%f\t%f\t\n",fgkDetectorName[idet], sumdT,sumdVM,100.*sumdT/sumdTAll, 100.*sumdVM/sumdVMAll); 
+  }
+  fclose (pFile);
 }
 
+
 
 
 
@@ -272,3 +309,14 @@ Float_t TopUsage(TTree* tree, const char *exp, const char*cut, Int_t order){
   delete [] index;
   return val;
 }
+
+
+Double_t SumUsage(TTree* tree, const char *exp, const char*cut){
+  //
+  // return sum of usage
+  //
+  Int_t  entries = tree->Draw(Form("%s",exp),cut,"goff");
+  if (entries==0) return 0;
+  Double_t mean = TMath::Mean(entries, tree->GetV1());
+  return entries*mean;
+}