]> git.uio.no Git - u/mrichter/AliRoot.git/blame - PWGLF/FORWARD/analysis2/doc/Trains.C
Various fixes, and possibility to set trigger efficiency
[u/mrichter/AliRoot.git] / PWGLF / FORWARD / analysis2 / doc / Trains.C
CommitLineData
7541582f 1#error This file is not for compilation
2/**
3 * @page train_setup_doc Using the TrainSetup facility
4 *
5 * @tableofcontents
6 *
7 * @section train_setup_overview Overview
8 *
9 * Users should define a class that derives from TrainSetup. The class
10 * should implement the member function TrainSetup::CreateTasks to add
11 * needed tasks to the train
12 *
13 * @code
14 * // MyTrain.C
15 * class MyTrain : public TrainSetup
16 * {
17 * public:
18 * MyTrain(const char* name="MyTrain")
19 * : TrainSetup(name),
20 * fParameter(0)
21 * {
22 * // SetType(kAOD); // AOD input
23 * // SetType(kESD); // ESD input
24 * // Input unspecified - can be set later
25 * }
26 * protected:
27 * void CreateTasks(EMode mode, Bool_t par, AliAnalysisManager* mgr)
28 * {
29 * AliAnalysisManager::SetCommonFileName("my_analysis.root");
30 * LoadLibrary("MyAnalysis", par, true);
31 * Bool_t mc = mgr->GetMCtruthEventHandler() != 0;
32 * gROOT->Macro(Form("AddTaskMyAnalysis.C(%f)",fParameter));
33 * }
34 * const char* ClassName() const { return "MyTrain"; }
35 * void MakeOptions(Runner& r)
36 * {
37 * TrainSetup::MakeOptions(r);
38 * r.Add(new Option("parameter", "My parameter", "VALUE"));
39 * }
40 * void SetOptions(Runner& r)
41 * {
42 * TrainSetup::SetOptions(r);
43 * Option* param = r.FindOption("parameter");
44 * if (param) fParameter = param->AsDouble();
45 * }
46 * Double_t fParameter;
47 * };
48 * @endcode
49 *
50 * @section train_setup_exec Execution of the train
51 *
52 * A user defined TrainSetup class can then be run like
53 *
54 * @code
55 * Root> .x RunTrain.C("<class>", "<name>", "<options>", "<runs>", nEvents)
56 * @endcode
57 *
58 * or using the program @b runTrain
59 *
66cf95f2 60 * @code
61 * > runTrain --class=<class> --name=<name> [<options>]
62 * @endcode
7541582f 63 *
64 * Here,
65 *
66 * - &lt;class&gt; is the name of the user defined class deriving from
67 * TrainSetup.
68 *
69 * - &lt;name&gt; is an arbitary name to give to the train. Note, an
70 * @e escaped @e name will be generated from this, which replaces
71 * all spaces and the like with '_' and (optionally) with the date
72 * and time appended.
73 *
74 * - &lt;options&gt; is a list of options. For RunTrain this is a
75 * comma separated list of options in the form
76 * &lt;option&gt;=&lt;value&gt; for value options and &lt;option&gt;
77 * for flags (booleans). For @c runTrain, the options are of the
78 * traditional Unix long type: @c --&lt;option&gt;=&lt;value&gt; and
79 * @c --&lt;option&gt;. The exact list of options for a given train
80 * can be listed by passing the option @b help.
81 *
82 * See also ::RunTrain and ::main
83 *
84 * In both cases, a new sub-directory called @e escaped @e name of the
85 * train is created, and various files are copied there - depending on
86 * the mode of execution.
87 *
88 * For local analysis, no aditional files are copied there, but the
89 * output will be put there.
90 *
91 * For PROOF analysis, the needed PAR files are copied there and
92 * expanded. The output of the job may end up in this directory if so
93 * instructed.
94 *
95 * For Grid analysis, various JDL and steering scripts are copied to
96 * this directory.
97 *
98 * In all cases, a file named @c rerun.C (and for @b runTrain:
99 * rerun.sh) is generated in this sub-directory. It contains the
100 * setting used for the train and can easily be used to run merging
101 * and terminate as needed.
102 *
66cf95f2 103 * @section train_setup_proof_spec PROOF specifics
104 *
105 * Local and Grid jobs are in a sense very similar. That is, the
106 * individual Grid jobs are very much like Local jobs, in that they
107 * always produce output files which albiet not after Terminate.
108 *
109 * PROOF jobs are very different. In a PROOF analysis, each slave
110 * only produces in memory output which is then sent via net
111 * connections (sockets) to the master. One therefore needs to be
112 * very of output object ownership and the like.
113 *
114 * Another major difference is that output files are generated within
115 * the PROOF cluster, and are generally not accessible from the
116 * outside. For plain PROOF clusters in a local area network or
117 * so-called <i>Lite</i> session, it is generally not a problem since
118 * the files are accessible on the LAN or local machine for Lite
119 * sessions. However, for large scale analysis farms (AAFs), the
120 * workers and masters are generally on a in-accessible sub-net, and
121 * there's no direct access to the produced files. Now, for normal
122 * output files, like histogram files, etc. there are provisions for
123 * this, which means the final merged output is sent back to the
124 * client. Special output, such as AODs, are however not merged nor
125 * sent back to the user by default. There are two ways to deal with this:
126 *
127 * <ol>
128 * <li> Register the output tree as a data set on the cluster. This is useful if you need to process the results again on the cluster.</li>
129 * <li> Send the output to a (possibly custom) XRootd server. This is useful if you need to process the output outside of the cluster</li>
130 * </ol>
131 *
132 * The first mode is specified by passing the option
133 * <tt>dsname=</tt><i>&lt;name&gt;</i> in the cluster URI. The created
134 * dataset will normally be made in
135 * <tt>/default/</tt><i>&lt;user&gt;</i><tt>/</tt><i>&lt;name&gt;</i>. If the
136 * <tt>=</tt><i>&lt;name&gt;</i> part is left out, the <i>escaped name</i> of
137 * the job will be used.
138 *
139 * The second mode is triggered by passing the option
140 * <tt>storage=<i>URI</i></tt> to the train setup. The <i>URI</i>
141 * should be of the form
142 *
143 * @code
144 * rootd://<host>[:<port>]/<path>
145 * @endcode
146 *
147 * where <i>&lt;host&gt;</i> is the name of a machine accessible by
148 * the cluster, <i>&lt;port&gt;</i> is an optional port number (e.g.,
149 * if different from 1093), and <i>&lt;path&gt;</i> is an absolute
150 * path on <i>&lt;host&gt;</i>.
151 *
152 * The XRootd process should be started (optionally by the user) on
153 * <i>&lt;host&gt;</i> as
154 *
155 * @code
156 * xrootd -p <port> <path>
157 * @endcode
158 *
159 * When running jobs on AAFs, one can use the Grid handler to set-up
160 * aspects of the job. However, sometimes it's desirable to leave the
161 * Grid handler out. To do that, pass the option <tt>plain</tt> in
162 * the cluster URI.
163 *
7541582f 164 * @section train_setup_input Specifying the input
165 * @subsection train_setup_local Local data input
166 *
167 * For both ESD and AOD input for local jobs, one must specify the
168 * root of the sub-tree that holds the data. That is, if - for
169 * example - the data resides in a directory structure like
170 *
171 * <pre>
172 * /some/directory/&lt;run&gt;/&lt;seq&gt;/AliESDs.root
173 * </pre>
174 *
175 * then one should specify the input location like
176 *
177 * @code
178 * train->SetDataDir("/some/directory");
179 * @endcode
180 *
181 * <tt>/some/directory</tt> is then search recursively for input files
182 * that match the pattern given by the analysis type (ESD:
183 * <tt>AliESDs.root</tt>, AOD: <tt>AliAOD.root</tt>). The found files
184 * are then chained together. If MC input is specified, then the
185 * companion files <tt>galice.root</tt>, <tt>Kinematics.root</tt>, and
186 * <tt>TrackRefs.root</tt> must be found in the same directories as
187 * the <tt>AliESDs.root</tt> files
188 *
189 * @subsection train_setup_proof PROOF input.
190 *
191 * The input data for a PROOF based analysis can be specified as per a
66cf95f2 192 * Local job if the cluster used is local, in which case the data must
193 * be available to the slaves at the specified locations, or one can
194 * specify a data-set name via
7541582f 195 *
196 * @code
197 * train->SetDataSet("<data-set-name>");
198 * @endcode
199 *
66cf95f2 200 * @b Note: For AAFs using the Grid Handler one <i>must</i> use data sets.
201 *
7541582f 202 * @subsection train_setup_grid_esd Grid ESD input.
203 *
204 * Suppose the ESD files are stored on the Grid as
205 *
206 * <pre>
207 * /alice/data/&lt;year&gt;/&lt;period&gt;/&lt;run&gt;/ESDs/pass&lt;no&gt;/&lt;year&gt;&lt;run&gt;&lt;chunk&gt;.&lt;part&gt;/AliESDs.root
208 * </pre>
209 *
210 * where &lt;run&gt; is zero-padded by typically 3 '0's. One should
211 * specify the input location like
212 *
213 * @code
214 * train->SetDataDir("/alice/data/<year>/<period>");
215 * train->SetDataPattern("ESDs/pass<no>/&ast;/");
216 * train->AddRun(<run>);
217 * @endcode
218 *
219 * If a particular kind of pass is needed, say
220 * <tt>pass&lt;no&gt;_MUON</tt>, one should do
221 *
222 * @code
223 * train->SetDataPattern("ESDs/pass<no>_MUON/&ast;/");
224 * @endcode
225 *
226 * The AliEn analysis plug-in is then instructed to look for data files under
227 *
228 * <pre>
229 * /alice/data/&lt;year&gt;/&lt;period&gt;/&lt;run&gt;/ESDs/pass&lt;no&gt;/&nbsp;*&nbsp;/AliESDs.root
230 * </pre>
231 *
232 * for each added run.
233 *
234 * For simulation output, the files are generally stored like
235 *
236 * <pre>
237 * /alice/sim/&lt;year&gt;/&lt;prod&gt;/&lt;run&gt;/&lt;seq&gt;/AliESDs.root
238 * </pre>
239 *
240 * where &lt;run&gt; is generally @e not zero-padded. One should
241 * specify the input location like
242 *
243 * @code
244 * train->SetDataDir("/alice/data/<year>/<period>");
245 * train->SetDataPattern("*");
246 * train->AddRun(<run>);
247 * @endcode
248 *
249 *
250 * @subsection train_setup_grid_aod Grid AOD input
251 *
252 * Suppose your AOD files are placed in directories like
253 *
254 * <pre>
255 * /some/directory/&lt;run&gt;/&lt;seq&gt;/AliAOD.root
256 * </pre>
257 *
258 * where &lt;run&gt; is zero-padded by typically 3 '0's. One should
259 * then specify the input as
260 *
261 * @code
262 * train->SetDataDir("/some/directory");
263 * train->SetDataPattern("*");
264 * train->AddRun(<run>);
265 * @endcode
266 *
267 * The AliEn analysis plug-in is then instructed to look for data files under
268 *
269 * <pre>
270 * /some/directory/&lt;run&gt;/&nbsp;*&nbsp;/AliAOD.root
271 * </pre>
272 *
273 * for each added run.
274 *
275 * Suppose the AODs are in
276 *
277 * <pre>
278 * /alice/data/&lt;year&gt;/&lt;period&gt;/&lt;run&gt;/ESDs/pass&lt;no&gt;/AOD&vers&gt;/&lt;seq&gt;/AliAOD.root
279 * </pre>
280 *
281 * @code
282 * train->SetDataDir("/alice/data/<year>/<period>");
283 * train->SetDataPattern("ESDs/pass<no>/AOD<vers>/&ast;/");
284 * train->AddRun(<run>);
285 * @endcode
286 *
287 * For simulation output, the files are generally stored like
288 *
289 * <pre>
290 * /alice/sim/&lt;year&gt;/&lt;prod&gt;/&lt;run&gt;/&lt;seq&gt;/AliAOD.root
291 * </pre>
292 *
293 * where &lt;run&gt; is generally @e not zero-padded. One should
294 * should specify the input location like
295 *
296 * @code
297 * train->SetDataDir("/alice/data/<year>/<period>");
298 * train->SetDataPattern("*");
299 * train->AddRun(<run>);
300 * @endcode
301 *
302 * @section train_setup_other Other features
303 * @subsection train_setup_options Options interface
304 *
305 * If the train does not depend on additional options or parameters,
306 * the member functions TrainSetup::MakeOptions and
307 * TrainSetup::SetOptions can be left un-overloaded in the derived
308 * class. However, options defined in this way can be set through the
309 * command line of the program @b runTrain, and provides a great deal
310 * of flexiblity. The Option class provides means of translating the
311 * passed string values to integers, doubles, booleans, and of course
312 * strings.
313 *
314 * @subsection train_setup_aux Auxillary libraries, sources, and files
315 *
316 * Auxillary libraries should be loaded using
317 *
318 * - TrainSetup::LoadLibrary(const char*,Bool_t,Bool_t)
319 *
320 * where first argument is the name of the library, the econd should
321 * be true if the library should be loaded as a PAR file (PROOF and
322 * Grid only), and the argument should be true if the library should
323 * be loaded on the PROOF slaves/Grid workers too.
324 *
325 * If the train needs additional files, say a script for setting up
326 * the tasks, or some data file, it can be passed on the the
327 * PROOF/Grid workers using the member functions
328 *
329 * - TrainSetup::AddExtraFile(const char*)
330 * - TrainSetup::AddSource(const char*,bool)
331 *
332 * @subsection train_setup_overload Overloading the behaviour
333 *
334 * The base class TrainSetup tries to implement a sensible setup for a
335 * given type of analysis, but some times a particular train needs a
336 * bit of tweaking. One can therefore overload the following functions
337 *
338 * - TrainSetup::CreateGridHandler()
339 * - TrainSetup::CreateInputHandler(EType)
340 * - TrainSetup::CreateMCHandler(EType,bool)
341 * - TrainSetup::CreateOutputHandler(EType)
342 * - TrainSetup::CreatePhysicsSelection(Bool_t,AliAnalysisManager*)
343 * - TrainSetup::CreateCentralitySelection(Bool_t,AliAnalysisManager*)
344 *
345 * @section train_setup_scripts Tasks defined in scripts
346 *
347 * A task can even be defined in a script, like for example a task like
348 *
349 * @include MyAnalysis.C
350 *
351 * Our train set-up can then use the member function
352 * TrainSetup::MakeScriptPAR to make a PAR file of the script and use
353 * that to make a library loaded on the workers and then generate an
354 * object of our task defined in the script.
355 *
356 * @include MyTrain.C
357 *
358 * This can allow for fast development and testing of analysis tasks
359 * without having to wait for official tasks and builds of all of
360 * AliROOT
361 */
362//
363// EOF
364//