]>
Commit | Line | Data |
---|---|---|
7541582f | 1 | #error This file is not for compilation |
2 | /** | |
3 | * @page train_setup_doc Using the TrainSetup facility | |
4 | * | |
5 | * @tableofcontents | |
6 | * | |
7 | * @section train_setup_overview Overview | |
8 | * | |
9 | * Users should define a class that derives from TrainSetup. The class | |
10 | * should implement the member function TrainSetup::CreateTasks to add | |
11 | * needed tasks to the train | |
12 | * | |
13 | * @code | |
14 | * // MyTrain.C | |
15 | * class MyTrain : public TrainSetup | |
16 | * { | |
17 | * public: | |
18 | * MyTrain(const char* name="MyTrain") | |
19 | * : TrainSetup(name), | |
20 | * fParameter(0) | |
21 | * { | |
22 | * // SetType(kAOD); // AOD input | |
23 | * // SetType(kESD); // ESD input | |
24 | * // Input unspecified - can be set later | |
25 | * } | |
26 | * protected: | |
27 | * void CreateTasks(EMode mode, Bool_t par, AliAnalysisManager* mgr) | |
28 | * { | |
29 | * AliAnalysisManager::SetCommonFileName("my_analysis.root"); | |
30 | * LoadLibrary("MyAnalysis", par, true); | |
31 | * Bool_t mc = mgr->GetMCtruthEventHandler() != 0; | |
32 | * gROOT->Macro(Form("AddTaskMyAnalysis.C(%f)",fParameter)); | |
33 | * } | |
34 | * const char* ClassName() const { return "MyTrain"; } | |
35 | * void MakeOptions(Runner& r) | |
36 | * { | |
37 | * TrainSetup::MakeOptions(r); | |
38 | * r.Add(new Option("parameter", "My parameter", "VALUE")); | |
39 | * } | |
40 | * void SetOptions(Runner& r) | |
41 | * { | |
42 | * TrainSetup::SetOptions(r); | |
43 | * Option* param = r.FindOption("parameter"); | |
44 | * if (param) fParameter = param->AsDouble(); | |
45 | * } | |
46 | * Double_t fParameter; | |
47 | * }; | |
48 | * @endcode | |
49 | * | |
50 | * @section train_setup_exec Execution of the train | |
51 | * | |
52 | * A user defined TrainSetup class can then be run like | |
53 | * | |
54 | * @code | |
55 | * Root> .x RunTrain.C("<class>", "<name>", "<options>", "<runs>", nEvents) | |
56 | * @endcode | |
57 | * | |
58 | * or using the program @b runTrain | |
59 | * | |
66cf95f2 | 60 | * @code |
61 | * > runTrain --class=<class> --name=<name> [<options>] | |
62 | * @endcode | |
7541582f | 63 | * |
64 | * Here, | |
65 | * | |
66 | * - <class> is the name of the user defined class deriving from | |
67 | * TrainSetup. | |
68 | * | |
69 | * - <name> is an arbitary name to give to the train. Note, an | |
70 | * @e escaped @e name will be generated from this, which replaces | |
71 | * all spaces and the like with '_' and (optionally) with the date | |
72 | * and time appended. | |
73 | * | |
74 | * - <options> is a list of options. For RunTrain this is a | |
75 | * comma separated list of options in the form | |
76 | * <option>=<value> for value options and <option> | |
77 | * for flags (booleans). For @c runTrain, the options are of the | |
78 | * traditional Unix long type: @c --<option>=<value> and | |
79 | * @c --<option>. The exact list of options for a given train | |
80 | * can be listed by passing the option @b help. | |
81 | * | |
82 | * See also ::RunTrain and ::main | |
83 | * | |
84 | * In both cases, a new sub-directory called @e escaped @e name of the | |
85 | * train is created, and various files are copied there - depending on | |
86 | * the mode of execution. | |
87 | * | |
88 | * For local analysis, no aditional files are copied there, but the | |
89 | * output will be put there. | |
90 | * | |
91 | * For PROOF analysis, the needed PAR files are copied there and | |
92 | * expanded. The output of the job may end up in this directory if so | |
93 | * instructed. | |
94 | * | |
95 | * For Grid analysis, various JDL and steering scripts are copied to | |
96 | * this directory. | |
97 | * | |
98 | * In all cases, a file named @c rerun.C (and for @b runTrain: | |
99 | * rerun.sh) is generated in this sub-directory. It contains the | |
100 | * setting used for the train and can easily be used to run merging | |
101 | * and terminate as needed. | |
102 | * | |
66cf95f2 | 103 | * @section train_setup_proof_spec PROOF specifics |
104 | * | |
105 | * Local and Grid jobs are in a sense very similar. That is, the | |
106 | * individual Grid jobs are very much like Local jobs, in that they | |
107 | * always produce output files which albiet not after Terminate. | |
108 | * | |
109 | * PROOF jobs are very different. In a PROOF analysis, each slave | |
110 | * only produces in memory output which is then sent via net | |
111 | * connections (sockets) to the master. One therefore needs to be | |
112 | * very of output object ownership and the like. | |
113 | * | |
114 | * Another major difference is that output files are generated within | |
115 | * the PROOF cluster, and are generally not accessible from the | |
116 | * outside. For plain PROOF clusters in a local area network or | |
117 | * so-called <i>Lite</i> session, it is generally not a problem since | |
118 | * the files are accessible on the LAN or local machine for Lite | |
119 | * sessions. However, for large scale analysis farms (AAFs), the | |
120 | * workers and masters are generally on a in-accessible sub-net, and | |
121 | * there's no direct access to the produced files. Now, for normal | |
122 | * output files, like histogram files, etc. there are provisions for | |
123 | * this, which means the final merged output is sent back to the | |
124 | * client. Special output, such as AODs, are however not merged nor | |
125 | * sent back to the user by default. There are two ways to deal with this: | |
126 | * | |
127 | * <ol> | |
128 | * <li> Register the output tree as a data set on the cluster. This is useful if you need to process the results again on the cluster.</li> | |
129 | * <li> Send the output to a (possibly custom) XRootd server. This is useful if you need to process the output outside of the cluster</li> | |
130 | * </ol> | |
131 | * | |
132 | * The first mode is specified by passing the option | |
133 | * <tt>dsname=</tt><i><name></i> in the cluster URI. The created | |
134 | * dataset will normally be made in | |
135 | * <tt>/default/</tt><i><user></i><tt>/</tt><i><name></i>. If the | |
136 | * <tt>=</tt><i><name></i> part is left out, the <i>escaped name</i> of | |
137 | * the job will be used. | |
138 | * | |
139 | * The second mode is triggered by passing the option | |
140 | * <tt>storage=<i>URI</i></tt> to the train setup. The <i>URI</i> | |
141 | * should be of the form | |
142 | * | |
143 | * @code | |
144 | * rootd://<host>[:<port>]/<path> | |
145 | * @endcode | |
146 | * | |
147 | * where <i><host></i> is the name of a machine accessible by | |
148 | * the cluster, <i><port></i> is an optional port number (e.g., | |
149 | * if different from 1093), and <i><path></i> is an absolute | |
150 | * path on <i><host></i>. | |
151 | * | |
152 | * The XRootd process should be started (optionally by the user) on | |
153 | * <i><host></i> as | |
154 | * | |
155 | * @code | |
156 | * xrootd -p <port> <path> | |
157 | * @endcode | |
158 | * | |
159 | * When running jobs on AAFs, one can use the Grid handler to set-up | |
160 | * aspects of the job. However, sometimes it's desirable to leave the | |
161 | * Grid handler out. To do that, pass the option <tt>plain</tt> in | |
162 | * the cluster URI. | |
163 | * | |
7541582f | 164 | * @section train_setup_input Specifying the input |
165 | * @subsection train_setup_local Local data input | |
166 | * | |
167 | * For both ESD and AOD input for local jobs, one must specify the | |
168 | * root of the sub-tree that holds the data. That is, if - for | |
169 | * example - the data resides in a directory structure like | |
170 | * | |
171 | * <pre> | |
172 | * /some/directory/<run>/<seq>/AliESDs.root | |
173 | * </pre> | |
174 | * | |
175 | * then one should specify the input location like | |
176 | * | |
177 | * @code | |
178 | * train->SetDataDir("/some/directory"); | |
179 | * @endcode | |
180 | * | |
181 | * <tt>/some/directory</tt> is then search recursively for input files | |
182 | * that match the pattern given by the analysis type (ESD: | |
183 | * <tt>AliESDs.root</tt>, AOD: <tt>AliAOD.root</tt>). The found files | |
184 | * are then chained together. If MC input is specified, then the | |
185 | * companion files <tt>galice.root</tt>, <tt>Kinematics.root</tt>, and | |
186 | * <tt>TrackRefs.root</tt> must be found in the same directories as | |
187 | * the <tt>AliESDs.root</tt> files | |
188 | * | |
189 | * @subsection train_setup_proof PROOF input. | |
190 | * | |
191 | * The input data for a PROOF based analysis can be specified as per a | |
66cf95f2 | 192 | * Local job if the cluster used is local, in which case the data must |
193 | * be available to the slaves at the specified locations, or one can | |
194 | * specify a data-set name via | |
7541582f | 195 | * |
196 | * @code | |
197 | * train->SetDataSet("<data-set-name>"); | |
198 | * @endcode | |
199 | * | |
66cf95f2 | 200 | * @b Note: For AAFs using the Grid Handler one <i>must</i> use data sets. |
201 | * | |
7541582f | 202 | * @subsection train_setup_grid_esd Grid ESD input. |
203 | * | |
204 | * Suppose the ESD files are stored on the Grid as | |
205 | * | |
206 | * <pre> | |
207 | * /alice/data/<year>/<period>/<run>/ESDs/pass<no>/<year><run><chunk>.<part>/AliESDs.root | |
208 | * </pre> | |
209 | * | |
210 | * where <run> is zero-padded by typically 3 '0's. One should | |
211 | * specify the input location like | |
212 | * | |
213 | * @code | |
214 | * train->SetDataDir("/alice/data/<year>/<period>"); | |
215 | * train->SetDataPattern("ESDs/pass<no>/*/"); | |
216 | * train->AddRun(<run>); | |
217 | * @endcode | |
218 | * | |
219 | * If a particular kind of pass is needed, say | |
220 | * <tt>pass<no>_MUON</tt>, one should do | |
221 | * | |
222 | * @code | |
223 | * train->SetDataPattern("ESDs/pass<no>_MUON/*/"); | |
224 | * @endcode | |
225 | * | |
226 | * The AliEn analysis plug-in is then instructed to look for data files under | |
227 | * | |
228 | * <pre> | |
229 | * /alice/data/<year>/<period>/<run>/ESDs/pass<no>/ * /AliESDs.root | |
230 | * </pre> | |
231 | * | |
232 | * for each added run. | |
233 | * | |
234 | * For simulation output, the files are generally stored like | |
235 | * | |
236 | * <pre> | |
237 | * /alice/sim/<year>/<prod>/<run>/<seq>/AliESDs.root | |
238 | * </pre> | |
239 | * | |
240 | * where <run> is generally @e not zero-padded. One should | |
241 | * specify the input location like | |
242 | * | |
243 | * @code | |
244 | * train->SetDataDir("/alice/data/<year>/<period>"); | |
245 | * train->SetDataPattern("*"); | |
246 | * train->AddRun(<run>); | |
247 | * @endcode | |
248 | * | |
249 | * | |
250 | * @subsection train_setup_grid_aod Grid AOD input | |
251 | * | |
252 | * Suppose your AOD files are placed in directories like | |
253 | * | |
254 | * <pre> | |
255 | * /some/directory/<run>/<seq>/AliAOD.root | |
256 | * </pre> | |
257 | * | |
258 | * where <run> is zero-padded by typically 3 '0's. One should | |
259 | * then specify the input as | |
260 | * | |
261 | * @code | |
262 | * train->SetDataDir("/some/directory"); | |
263 | * train->SetDataPattern("*"); | |
264 | * train->AddRun(<run>); | |
265 | * @endcode | |
266 | * | |
267 | * The AliEn analysis plug-in is then instructed to look for data files under | |
268 | * | |
269 | * <pre> | |
270 | * /some/directory/<run>/ * /AliAOD.root | |
271 | * </pre> | |
272 | * | |
273 | * for each added run. | |
274 | * | |
275 | * Suppose the AODs are in | |
276 | * | |
277 | * <pre> | |
278 | * /alice/data/<year>/<period>/<run>/ESDs/pass<no>/AOD&vers>/<seq>/AliAOD.root | |
279 | * </pre> | |
280 | * | |
281 | * @code | |
282 | * train->SetDataDir("/alice/data/<year>/<period>"); | |
283 | * train->SetDataPattern("ESDs/pass<no>/AOD<vers>/*/"); | |
284 | * train->AddRun(<run>); | |
285 | * @endcode | |
286 | * | |
287 | * For simulation output, the files are generally stored like | |
288 | * | |
289 | * <pre> | |
290 | * /alice/sim/<year>/<prod>/<run>/<seq>/AliAOD.root | |
291 | * </pre> | |
292 | * | |
293 | * where <run> is generally @e not zero-padded. One should | |
294 | * should specify the input location like | |
295 | * | |
296 | * @code | |
297 | * train->SetDataDir("/alice/data/<year>/<period>"); | |
298 | * train->SetDataPattern("*"); | |
299 | * train->AddRun(<run>); | |
300 | * @endcode | |
301 | * | |
302 | * @section train_setup_other Other features | |
303 | * @subsection train_setup_options Options interface | |
304 | * | |
305 | * If the train does not depend on additional options or parameters, | |
306 | * the member functions TrainSetup::MakeOptions and | |
307 | * TrainSetup::SetOptions can be left un-overloaded in the derived | |
308 | * class. However, options defined in this way can be set through the | |
309 | * command line of the program @b runTrain, and provides a great deal | |
310 | * of flexiblity. The Option class provides means of translating the | |
311 | * passed string values to integers, doubles, booleans, and of course | |
312 | * strings. | |
313 | * | |
314 | * @subsection train_setup_aux Auxillary libraries, sources, and files | |
315 | * | |
316 | * Auxillary libraries should be loaded using | |
317 | * | |
318 | * - TrainSetup::LoadLibrary(const char*,Bool_t,Bool_t) | |
319 | * | |
320 | * where first argument is the name of the library, the econd should | |
321 | * be true if the library should be loaded as a PAR file (PROOF and | |
322 | * Grid only), and the argument should be true if the library should | |
323 | * be loaded on the PROOF slaves/Grid workers too. | |
324 | * | |
325 | * If the train needs additional files, say a script for setting up | |
326 | * the tasks, or some data file, it can be passed on the the | |
327 | * PROOF/Grid workers using the member functions | |
328 | * | |
329 | * - TrainSetup::AddExtraFile(const char*) | |
330 | * - TrainSetup::AddSource(const char*,bool) | |
331 | * | |
332 | * @subsection train_setup_overload Overloading the behaviour | |
333 | * | |
334 | * The base class TrainSetup tries to implement a sensible setup for a | |
335 | * given type of analysis, but some times a particular train needs a | |
336 | * bit of tweaking. One can therefore overload the following functions | |
337 | * | |
338 | * - TrainSetup::CreateGridHandler() | |
339 | * - TrainSetup::CreateInputHandler(EType) | |
340 | * - TrainSetup::CreateMCHandler(EType,bool) | |
341 | * - TrainSetup::CreateOutputHandler(EType) | |
342 | * - TrainSetup::CreatePhysicsSelection(Bool_t,AliAnalysisManager*) | |
343 | * - TrainSetup::CreateCentralitySelection(Bool_t,AliAnalysisManager*) | |
344 | * | |
345 | * @section train_setup_scripts Tasks defined in scripts | |
346 | * | |
347 | * A task can even be defined in a script, like for example a task like | |
348 | * | |
349 | * @include MyAnalysis.C | |
350 | * | |
351 | * Our train set-up can then use the member function | |
352 | * TrainSetup::MakeScriptPAR to make a PAR file of the script and use | |
353 | * that to make a library loaded on the workers and then generate an | |
354 | * object of our task defined in the script. | |
355 | * | |
356 | * @include MyTrain.C | |
357 | * | |
358 | * This can allow for fast development and testing of analysis tasks | |
359 | * without having to wait for official tasks and builds of all of | |
360 | * AliROOT | |
361 | */ | |
362 | // | |
363 | // EOF | |
364 | // |