Changes for report #56707: On the proper usage of syswatch.log (Marian)
[u/mrichter/AliRoot.git] / STEER / AliSysInfo.cxx
index 21ec4c6..0dcb6b8 100644 (file)
 // Origin:  marian.ivanov@cern.ch
 //
 //  Make   a log file for the CPU and Memory usage
+//
+//  Principles:
+//  Snapshots of the system information are writen to the text log files.
+//  Text files were chosen in order to get the ouptu also in case code 
+//  is crashing. 
+//  Following information is stored in the log file:
+//  TTimeStamp stamp;
+//  CpuInfo_t  cpuInfo;
+//  MemInfo_t  memInfo;
+//  ProcInfo_t procInfo;
+// 
+//  Root TSystem is used to retrieve this information:
+//  gSystem->GetCpuInfo(&cpuInfo, 10);
+//  gSystem->GetMemInfo(&memInfo);
+//  gSystem->GetProcInfo(&procInfo);
+//  for details see:
+//  http://root.cern.ch/root/html/TUnixSystem.html
+//  http://root.cern.ch/root/html/ProcInfo_t.html
+//  http://root.cern.ch/root/html/MemInfo_t.html
+//  http://root.cern.ch/root/html/CpuInfo_t.html
+//  -------------------------------------------------------------------
+//  class CpuInfo_t
+//   Float_t   fIdle   cpu idle percentage
+//   Float_t   fLoad15m        cpu load average over 15 m
+//   Float_t   fLoad1m cpu load average over 1 m
+//   Float_t   fLoad5m cpu load average over 5 m
+//   Float_t   fSys    cpu sys load in percentage
+//   Float_t   fTotal  cpu user+sys load in percentage
+//   Float_t   fUser   cpu user load in percentage
+
+//  -------------------------------------------------------------------
+//  class ProcInfo_t:
+//  Float_t    fCpuSys system time used by this process in seconds
+//  Float_t    fCpuUser        user time used by this process in seconds
+//  Long_t     fMemResident    resident memory used by this process in KB
+//  Long_t     fMemVirtual     virtual memory used by this process in KB
+//  -------------------------------------------------------------------
+
+
+//  The information from the AliSysInfo can be used as measurement
+//  of the code quality. Be aware of the limitation induced by
+//  using only system info described in the  AliSysInfo::Test() function
+// 
+//  The example usage of the AliSysInfo is shown in the
+//  AliSysInfo::Test() example.
+//  
+//  
+//
 //  
-//  The typical usage:
+//  The typical usage in the AliRoot code:
 //  Make a set of stamps in the code in the place of interest
 //  e.g. 
 //
@@ -53,12 +101,14 @@ AliSysInfo::AliSysInfo():
     TObject(),
     fSysWatch(0),
     fTimer(0),
-    fMemStat(0)
+    fMemStat(0),
+    fCallBackFunc(0),
+    fNCallBack(0)
 {
   fTimer = new TStopwatch;
   fSysWatch  = new fstream("syswatch.log", ios_base::out|ios_base::trunc);
 
-  //hname/C:sname/C:sec/I:mI.fMemUsed/F:mI.fSwapUsed/F:pI.fMemResident/F:pI.fMemVirtual/F:cI.fUser/F:cI.fSys/F:cI.fCpuUser/F:pI.fCpuSys/F
+  //hname/C:sname/C:sec/D:mI.fMemUsed/F:mI.fSwapUsed/F:pI.fMemResident/F:pI.fMemVirtual/F:cI.fUser/F:cI.fSys/F:cI.fCpuUser/F:pI.fCpuSys/F
 
 
   (*fSysWatch) <<"hname"<<"/C:"               // hname - hostname  
@@ -66,29 +116,29 @@ AliSysInfo::AliSysInfo():
                <<"id0"<<"/I:"                // 0 id
                <<"id1"<<"/I:"                // 1 id
                <<"id2"<<"/I:"                // 1 id
-               <<"first"<<"/I:"              // first stamp
+               <<"first"<<"/D:"              // first stamp
     //
-              <<"stampSec"<<"/I:"         // time  - time stamp in seconds
-              <<"mi.fMemUsed"<<"/F:"       // system info 
-              <<"mi.fSwapUsed"<<"/F:"      //
-              <<"cI.fUser"<<"/F:"         //
-              <<"cI.fSys"<<"/F:"         //
+              <<"stampSec"<<"/D:"         // time  - time stamp in seconds
+              <<"mi.fMemUsed"<<"/D:"       // system info 
+              <<"mi.fSwapUsed"<<"/D:"      //
+              <<"cI.fUser"<<"/D:"         //
+              <<"cI.fSys"<<"/D:"         //
     // 
-              <<"pI.fMemResident"<<"/F:"  // process info
-              <<"pI.fMemVirtual"<<"/F:"   //    
-              <<"pI.fCpuUser"<<"/F:"      //
-              <<"pI.fCpuSys"<<"/F:"       //
+              <<"pI.fMemResident"<<"/D:"  // process info
+              <<"pI.fMemVirtual"<<"/D:"   //    
+              <<"pI.fCpuUser"<<"/D:"      //
+              <<"pI.fCpuSys"<<"/D:"       //
     //
-              <<"stampOldSec"<<"/I:"         // time  - time stamp in seconds
-              <<"miOld.fMemUsed"<<"/F:"       // system info - previous
-              <<"miOld.fSwapUsed"<<"/F:"      //
-              <<"cIOld.fUser"<<"/F:"         //
-              <<"cIOld.fSys"<<"/F:"         //
+              <<"stampOldSec"<<"/D:"         // time  - time stamp in seconds
+              <<"miOld.fMemUsed"<<"/D:"       // system info - previous
+              <<"miOld.fSwapUsed"<<"/D:"      //
+              <<"cIOld.fUser"<<"/D:"         //
+              <<"cIOld.fSys"<<"/D:"         //
     // 
-              <<"pIOld.fMemResident"<<"/F:"  // process info -previous
-              <<"pIOld.fMemVirtual"<<"/F:"   //    
-              <<"pIOld.fCpuUser"<<"/F:"      //
-              <<"pIOld.fCpuSys"<<"/F"       //
+              <<"pIOld.fMemResident"<<"/D:"  // process info -previous
+              <<"pIOld.fMemVirtual"<<"/D:"   //    
+              <<"pIOld.fCpuUser"<<"/D:"      //
+              <<"pIOld.fCpuSys"<<"/D"       //
               << endl;
   
 }
@@ -119,10 +169,13 @@ void AliSysInfo::AddStamp(const char *sname, Int_t id0, Int_t id1, Int_t id2){
   gSystem->GetCpuInfo(&cpuInfo, 10);
   gSystem->GetMemInfo(&memInfo);
   gSystem->GetProcInfo(&procInfo);
+  //  procInfo.fMemVirtual/=1024;  //size in MBy
+  //procInfo.fMemResident/=1024;  //size in MBy
+
   const char * hname = gSystem->HostName();
 
   static Int_t entry=0;
-  static Int_t first=stamp.GetSec();
+  static Double_t  first=stamp.GetSec()+stamp.GetNanoSec()/1000000000.;
   //
   static TTimeStamp stampOld;
   static CpuInfo_t  cpuInfoOld;
@@ -130,42 +183,46 @@ void AliSysInfo::AddStamp(const char *sname, Int_t id0, Int_t id1, Int_t id2){
   static ProcInfo_t procInfoOld;  
 
 
-  (*(Instance()->fSysWatch)) << hname   <<"\t"               // hname - hostname  
-               << sname    <<"\t"              // stamp name
-               << id0      <<"\t"
-               << id1      <<"\t"
-               << id2      <<"\t"
-               << first    <<"\t"              // first stamp               
-               //
-              << stamp.GetSec()<<"\t"         // time  - time stamp in seconds
-              << memInfo.fMemUsed<<"\t"       // system info 
-              << memInfo.fSwapUsed<<"\t"      //
-              << cpuInfo.fUser <<"\t"         //
-              << cpuInfo.fSys  <<"\t"         //
-               // 
-              << procInfo.fMemResident<<"\t"  // process info
-              << procInfo.fMemVirtual<<"\t"   //    
-              << procInfo.fCpuUser<<"\t"      //
-              << procInfo.fCpuSys<<"\t"       //
+  (*(Instance()->fSysWatch)) 
+    << hname   <<"\t"               // hname - hostname  
+    << sname    <<"\t"              // stamp name
+    << id0      <<"\t"
+    << id1      <<"\t"
+    << id2      <<"\t"
+    <<setprecision(15)<< first    <<"\t"              // first stamp               
     //
-              << stampOld.GetSec()<<"\t"         // time  - time stamp in seconds
-              << memInfoOld.fMemUsed<<"\t"       // system info - previous
-              << memInfoOld.fSwapUsed<<"\t"      //
-              << cpuInfoOld.fUser <<"\t"         //
-              << cpuInfoOld.fSys  <<"\t"         //
-               // 
-              << procInfoOld.fMemResident<<"\t"  // process info -previous
-              << procInfoOld.fMemVirtual<<"\t"   //    
-              << procInfoOld.fCpuUser<<"\t"      //
-              << procInfoOld.fCpuSys<<"\t"       //
-              << endl;
+    <<setprecision(15)<< stamp.GetSec()+stamp.GetNanoSec()/1000000000.<<"\t"         // time  - time stamp in seconds
+    << memInfo.fMemUsed<<"\t"       // system info 
+    << memInfo.fSwapUsed<<"\t"      //
+    << cpuInfo.fUser <<"\t"         //
+    << cpuInfo.fSys  <<"\t"         //
+    // 
+    <<setprecision(15)<< procInfo.fMemResident/1024.<<"\t"  // process info
+    <<setprecision(15)<< procInfo.fMemVirtual/1024.<<"\t"   //    
+    << procInfo.fCpuUser<<"\t"      //
+    << procInfo.fCpuSys<<"\t"       //
+    //
+    <<setprecision(15)<< stampOld.GetSec()+stampOld.GetNanoSec()/1000000000.<<"\t"         // time  - time stamp in seconds
+    << memInfoOld.fMemUsed<<"\t"       // system info - previous
+    << memInfoOld.fSwapUsed<<"\t"      //
+    << cpuInfoOld.fUser <<"\t"         //
+    << cpuInfoOld.fSys  <<"\t"         //
+    // 
+    <<setprecision(15)<< procInfoOld.fMemResident/1024.<<"\t"  // process info -previous
+    <<setprecision(15)<< procInfoOld.fMemVirtual/1024.<<"\t"   //    
+    << procInfoOld.fCpuUser<<"\t"      //
+    << procInfoOld.fCpuSys<<"\t"       //
+    << endl;
+
   stampOld   = stamp;
   cpuInfoOld = cpuInfo;
   memInfoOld = memInfo;
   procInfoOld= procInfo;
 
   //  if (fInstance->fMemStat) fInstance->fMemStat->AddStamps(sname);
+  for (Int_t icallback=0; icallback<Instance()->fNCallBack; icallback++){
+    Instance()->fCallBackFunc[icallback](sname);
+  }
   entry++;
 }
 
@@ -176,8 +233,8 @@ TTree * AliSysInfo::MakeTree(const char *lname){
   tree->ReadFile(lname);
   tree->SetAlias("deltaT","stampSec-stampOldSec");
   tree->SetAlias("T","stampSec-first");
-  tree->SetAlias("deltaVM","(pI.fMemVirtual-pIOld.fMemVirtual)*0.001");
-  tree->SetAlias("VM","pI.fMemVirtual*0.001");
+  tree->SetAlias("deltaVM","(pI.fMemVirtual-pIOld.fMemVirtual)");
+  tree->SetAlias("VM","pI.fMemVirtual");
   return tree;
 }
 
@@ -210,3 +267,57 @@ void AliSysInfo::CloseMemStat(){
   //if (Instance()->fMemStat  == TMemStatManager::GetInstance()) Instance()->fMemStat->Close();
   //Instance()->fMemStat=0;
 }
+
+
+
+void AliSysInfo::AddCallBack(StampCallback_t callback){
+  //
+  // add cal back function
+  //
+  AliSysInfo *info =  Instance();
+  if (!info->fCallBackFunc)
+    info->fCallBackFunc = new StampCallback_t[100];
+  info->fCallBackFunc[info->fNCallBack]=callback;
+  info->fNCallBack++;
+}
+
+
+
+TTree*  AliSysInfo::Test(){
+  //
+  // Test example for AliSysInfo:
+  // 1. Make huge memory leak
+  // 2. Slow down execution
+  /*
+    To use the test:
+    TTree * tree = AliSysInfo::Test();
+    // Make alias what we set as input
+    tree->SetAlias("deltaVMIn","(id0*100000+id1*10000+id2*1000)/1000000.")
+    tree->SetAlias("deltaVM","(pI.fMemVirtual-pIOld.fMemVirtual)");
+    tree->SetAlias("deltaTIn","(id1+id0*10)");
+    tree->SetAlias("deltaT","stampSec-stampOldSec");
+    //
+    tree->SetMarkerStyle(23); tree->SetMarkerSize(0.5);
+    // Memory usage
+    tree->Draw("deltaVM:deltaVMIn","Entry$>0");
+    // or alternative
+    tree->Draw("deltaVM:deltaVMIn","Entry$>0","prof");
+    //
+    // draw time usage
+    tree->Draw("deltaT:deltaTIn","Entry$>0"); 
+  */
+  //
+  // The delta of VM as obtained from the AliSysInfo starts to be proportional
+  // to  the input allocation after 0.12 MBy (and it is system dependent) 
+  // Bellow these limit the deltaVM can be used only in mean.
+  // (compare first and  profile histogram)
+  for (Int_t id0=0; id0<5; id0++)
+    for (Int_t id1=1; id1<10; id1++)
+      for (Int_t id2=0; id2<20; id2++){
+       new Char_t[id2*1000+id1*10000+id0*100000];  // emulate memory leak
+       gSystem->Sleep(id1+id0*10);         // emulate CPU usage 
+       AliSysInfo::AddStamp("Leak",id0,id1,id2);
+      }
+  TTree * tree = AliSysInfo::MakeTree("syswatch.log");
+  return tree;  
+}