]> git.uio.no Git - u/mrichter/AliRoot.git/blame - PWGPP/QA/scripts/alienSync.sh
fixing the way the photon is isolated at mc level and adding a TH3 for the mc-truth...
[u/mrichter/AliRoot.git] / PWGPP / QA / scripts / alienSync.sh
CommitLineData
d4ab9e58 1#!/bin/bash
2#
3# - script to sync a group of files on alien with a local cache
4# downloads only new and updated files
5# - by default it mirrors the directory structure in a specified local location
6# (the local chache location and paths can be manipulated.)
7# - needs a configured config file (by default alienSync.config)
8# and a working alien environment (token and at least $ALIEN_DIR or $ALIEN_ROOT set)
9#
10# origin: Mikolaj Krzewicki, mikolaj.krzewicki@cern.ch
11#
12main()
13{
14 if [[ $# -lt 1 ]]; then
15 echo Usage: $0 configFile
16 return
17 fi
18
19 # try to load the config file
20 [[ ! -f $1 ]] && echo "config file $1 not found, exiting..." | tee -a $logFile && exit 1
21 source $1
22
23 # do some accounting
24 [[ ! -d $logOutputPath ]] && echo "logOutputPath not available, creating..." && sg ${alienSyncFilesGroupOwnership} "mkdir -p $logOutputPath"
25 [[ ! -d $logOutputPath ]] && echo "could not create log dir, exiting..." && exit 1
26 dateString=$(date +%Y-%m-%d-%H-%M)
27 logFile=$logOutputPath/alienSync-$dateString.log
28 echo "$0 $@"|tee -a $logFile
29 echo ""|tee -a $logFile
30 echo log: $logFile
31
32 #be nice and allow group members access as well (002 will create dirs with 775 and files with 664)
33 umask 0002
34
35 #lock
36 lockFile=$logOutputPath/runningNow.lock
37 [[ -f $lockFile ]] && echo "locked. Another process running? ($lockFile)" | tee -a $logFile && exit 1
38 touch $lockFile
39 [[ ! -f $lockFile ]] && echo "unable to create lock. exiting..." | tee -a $logFile && exit 1
40
41 #redirect all output to a log
42 if [[ $allOutputToLog -eq 1 ]]; then
43 exec 6>&1
44 exec 1>$logFile 2>&1
45 fi
46
47 newFilesList=$logOutputPath/"newFiles.list"
48 rm -f $newFilesList
49 touch $newFilesList
50 redoneFilesList=$logOutputPath/"redoneFiles.list"
51 rm -f $redoneFilesList
52 touch $redoneFilesList
53 updatedFilesList="${logOutputPath}/updatedFiles.list"
54
55 # check the config
56 [[ -z $alienFindCommand ]] && echo "alienFindCommand not defined, exiting..." && exitScript 1
57 [[ -z ${localPathPrefix} ]] && echo "localPathPrefix not defined, exiting..." && exitScript 1
58 [[ -z $logOutputPath ]] && echo "logOutputPath not defined, exiting..." && exitScript 1
59 [[ -z $secondsToSuicide ]] && echo "setting default secondsToSuicide of 10 hrs..." && secondsToSuicide=$(( 10*3600 ))
60
61 # init alien
62 echo source $alienInitScript
63 source $alienInitScript ""
64 [[ -z $ALIEN_ROOT && -n $ALIEN_DIR ]] && ALIEN_ROOT=$ALIEN_DIR
65 #if ! haveAlienToken; then
66 # $ALIEN_ROOT/api/bin/alien-token-destroy
67 $ALIEN_ROOT/api/bin/alien-token-init $alienUserName
68 #fi
69 #if ! haveAlienToken; then
70 # if [[ $allOutputToLog -eq 1 ]]; then
71 # exec 1>&6 6>&-
72 # fi
73 # echo "problems getting token! exiting..." | tee -a $logFile
74 # exitScript 1
75 #fi
76 #ls -ltr /tmp/gclient_env_$UID
77 #cat /tmp/gclient_env_$UID
78 #source /tmp/gclient_env_$UID
79
80 #set a default timeout for grid access
81 [[ -z $copyTimeout ]] && copyTimeout=600
82 export GCLIENT_COMMAND_MAXWAIT=$copyTimeout
83
84 localAlienDatabase=$logOutputPath/localAlienDatabase.list
85 localFileList=$logOutputPath/localFile.list
86
87 alienFileListCurrent=$logOutputPath/alienFileDatabase.list
88 [[ ! -f $localFileList ]] && touch $localFileList
89 candidateLocalFileDatabase=$logOutputPath/candidateLocalFileDatabase.list
90
91 #here we produce the current alien file list
92 if [[ -n ${useExistingAlienFileDatabase} && -f ${localAlienDatabase} ]]; then
93 #we use the old one
94 echo "using ${localAlienDatabase} instead of full alien search"
95 echo cp -f ${localAlienDatabase} ${alienFileListCurrent}
96 cp -f ${localAlienDatabase} ${alienFileListCurrent}
97 else
98 #we make a new one
99 echo "eval $alienFindCommand > $alienFileListCurrent"
100 eval "$alienFindCommand" > $alienFileListCurrent
101 fi
102
103 echo "number of files in the collection: $(wc -l $alienFileListCurrent)"
104 #create a list of candidate destination locations
105 #this is in case there are more files on alien trying to get to the same local destination
106 #in which case we take the one with the youngest ctime (later in code)
107 if [[ -n ${destinationModifyCommand} ]]; then
108 echo eval "cat $alienFileListCurrent | ${destinationModifyCommand} | sed \"s,^,${localPathPrefix},\" > ${candidateLocalFileDatabase}"
109 eval "cat $alienFileListCurrent | ${destinationModifyCommand} | sed \"s,^,${localPathPrefix},\" > ${candidateLocalFileDatabase}"
110 fi
111
112 #logic is: if file list is missing we force the md5 recalculation
113 [[ ! -f $localAlienDatabase ]] && forceLocalMD5recalculation=1 && echo "forcing local MD5 sum recalculation" && cp -f $alienFileListCurrent $localAlienDatabase
114
115 #since we grep through the files frequently, copy some stuff to tmpfs for fast access
116 tmp=$(mktemp -d 2>/dev/null)
117 if [[ -d $tmp ]]; then
118 cp $localAlienDatabase $tmp
119 cp $localFileList $tmp
120 cp $alienFileListCurrent $tmp
121 [[ -f ${candidateLocalFileDatabase} ]] && cp ${candidateLocalFileDatabase} ${tmp}
122 else
123 tmp=$logOutputPath
124 fi
125
126 echo "starting downloading:"
127 lineNumber=0
128 alienFileCounter=0
129 localFileCounter=0
130 downloadedFileCounter=0
131 while read -r alienFile md5alien timestamp size
132 do
133 ((lineNumber++))
134
135 #sometimes the md5 turns out empty and is then stored as a "." to avoid problems parsing
136 [[ "$md5alien" =~ "." ]] && md5alien=""
137
138 [[ -n $timeStampInLog ]] && date
139 [[ $SECONDS -ge $secondsToSuicide ]] && echo "$SECONDS seconds passed, exiting by suicide..." && break
140 [[ "$alienFile" != "/"*"/"?* ]] && echo "WARNING: read line not path-like: $alienFile" && continue
141 ((alienFileCounter++))
142 destination=${localPathPrefix}/${alienFile}
143 destination=${destination//\/\///} #remove double slashes
144 [[ -n ${destinationModifyCommand} ]] && destination=$( eval "echo ${destination} | ${destinationModifyCommand}" )
145 destinationdir=${destination%/*}
146 [[ -n $softLinkName ]] && softlinktodestination=${destinationdir}/${softLinkName}
147 tmpdestination="${destination}.aliensyncTMP"
148
149 if [[ -n ${destinationModifyCommand} ]]; then
150 #find the candidate in the database, in case there are more files trying to go to the same
151 #place due to $destinationModifyCommand which alters the final path, find the one
152 #with the largest ctime (3rd field in the database list) and check if that is the current one
153 #if not - skip
154 #echo grep -n ${destination} $candidateLocalFileDatabase | sed "s/:/ /" | sort -rk4
155 #grep -n ${destination} $candidateLocalFileDatabase| sed "s/:/ /" | sort -rk4
156 #this guy contains: index of the original entry, local file name, md5, ctime
157 candidateDBrecord=($(grep -n ${destination} $tmp/${candidateLocalFileDatabase##*/}| sed "s/:/ /" | sort -rk4|head -n1 ))
158 originalEntryIndex=${candidateDBrecord[0]}
159 [[ $lineNumber -ne $originalEntryIndex ]] && continue
160 fi
161
162 redownloading=""
163 if [[ -f ${destination} ]]; then
164 #if we want the soft links and they are not there for existing files, create them
165 if [[ ! -h "$softlinktodestination" && -n $softLinkName ]]; then
166 echo ln -s ${destination} $softlinktodestination
167 ln -s ${destination} $softlinktodestination
168 fi
169 ((localFileCounter++))
170
171 localDBrecord=($(grep $alienFile $tmp/${localAlienDatabase##*/}))
172 md5local=${localDBrecord[1]}
173
174 #sometimes the md5 turns out empty and is then stored as a "." to avoid problems parsing
175 [[ "$md5local" =~ "." ]] && md5local=""
176
177 if [[ $forceLocalMD5recalculation -eq 1 || -z $md5local ]]; then
178 tmparrayMD5=($(md5sum ${destination}))
179 md5recalculated=${tmparrayMD5[0]}
180 [[ "$md5local" != "$md5recalculated" ]] && echo "WARNING: local copy change ${destination}"
181 md5local=${md5recalculated}
182 fi
183 if [[ "$md5local" == "$md5alien" && -n $md5alien ]]; then
184 echo "OK ${destination} $md5alien"
185 if ! grep -q ${destination} $tmp/${localFileList##*/}; then
186 echo ${destination} >> $localFileList
187 fi
188 continue
189 fi
190 if [[ -z $md5alien ]]; then
191 if ! grep -q ${destination} $tmp/${localFileList##*/}; then
192 echo ${destination} >> $localFileList
193 fi
194 echo "WARNING: missing alien md5, leaving the local file as it is"
195 continue
196 fi
197 echo "WARNING: md5 mismatch ${destination}"
198 echo " $md5local $md5alien"
199 redownloading=1
200 fi
201
202 [[ -f $tmpdestination ]] && echo "WARNING: stale $tmpdestination, removing" && rm $tmpdestination
203
204 sg ${alienSyncFilesGroupOwnership} "mkdir -p ${destinationdir}"
205 [[ ! -d $destinationdir ]] && echo cannot access $destinationdir && continue
206
207 #check token
208 #if ! haveAlienToken; then
209 # $ALIEN_ROOT/api/bin/alien-token-init $alienUserName
210 # #source /tmp/gclient_env_$UID
211 #fi
212
213 export copyMethod
214 export copyScript
215 export copyTimeout
216 export copyTimeoutHard
217 echo copyFromAlien "$alienFile" "$tmpdestination"
218 [[ $pretend -eq 1 ]] && continue
219 copyFromAlien $alienFile $tmpdestination
220 chgrp ${alienSyncFilesGroupOwnership} $tmpdestination
221
222 # if we didn't download remove the destination in case we tried to redownload
223 # a corrupted file
224 [[ ! -f $tmpdestination ]] && echo "file not downloaded" && rm -f ${destination} && continue
225
226 downloadOK=0
227 #verify the downloaded md5 if available, validate otherwise...
228 if [[ -n $md5alien ]]; then
229 if (echo "$md5alien $tmpdestination"|md5sum -c --status -); then
230 echo "OK md5 after download"
231 downloadOK=1
232 else
233 echo "tried to parse this: $md5alien $tmpdestination"
234 fi
235 else
236 downloadOK=1
237 fi
238
239 #handle zip files - check the checksums
240 if [[ $alienFile =~ '.zip' && $downloadOK -eq 1 ]]; then
241 echo "checking integrity of zip archive $tmpdestination"
242 if unzip -t $tmpdestination; then
243 downloadOK=1
244 else
245 downloadOK=0
246 fi
247 fi
248
249 if [[ $downloadOK -eq 1 ]]; then
250 echo mv $tmpdestination ${destination}
251 mv $tmpdestination ${destination}
252 chgrp ${alienSyncFilesGroupOwnership} ${destination}
253 ((downloadedFileCounter++))
254 if [[ -n $softlinktodestination ]]; then
255 echo ln -s ${destination} $softlinktodestination
256 ln -s ${destination} $softlinktodestination
257 fi
258 [[ -z $redownloading ]] && echo ${destination} >> $newFilesList
259 [[ -n $redownloading ]] && echo ${destination} >> $redoneFilesList
260 if ! grep -q ${destination} $tmp/${localFileList##*/}; then
261 echo ${destination} >> $localFileList
262 fi
263 [[ -n ${postCommand} ]] && ( cd ${destinationdir}; eval "${postCommand}" )
264 else
265 echo "download not validated, NOT moving to ${destination}..."
266 rm -f $tmpdestination
267 continue
268 fi
269
270 if [[ $unzipFiles -eq 1 ]]; then
271 echo unzip $tmpdestination -d $destinationdir
272 unzip $tmpdestination -d $destinationdir
273 fi
274
275 echo
276 done < ${alienFileListCurrent}
277
278 [[ $alienFileCounter -gt 0 ]] && mv -f $alienFileListCurrent $localAlienDatabase
279
280 echo ${0##*/} DONE
281
282 if [[ $allOutputToLog -eq 1 ]]; then
283 exec 1>&6 6>&-
284 fi
285
286 cat ${newFilesList} ${redoneFilesList} > ${updatedFilesList}
287 eval "${executeEnd}"
288
289 echo alienFindCommand:
290 echo " $alienFindCommand"
291 echo
292 echo "files on alien: $alienFileCounter"
293 echo "local files before: $localFileCounter"
294 echo "files downloaded: $downloadedFileCounter"
295 echo
296 echo "new files:"
297 echo
298 cat $newFilesList
299 echo
300 echo "redone files:"
301 echo
302 cat $redoneFilesList
303
304 [[ -n $sendMailTo ]] && echo $logFile | mail -s "alienSync $alienFindCommand done" $sendMailTo
305
306 exitScript 0
307}
308
309exitScript()
310{
311 echo
312 echo removing $lockFile
313 rm -f $lockFile
314 echo removing $tmp
315 rm -rf $tmp
316 exit $1
317}
318
319alien_find()
320{
321 # like a regular alien_find command
322 # output is a list with md5sums and ctimes
323 executable="$ALIEN_ROOT/api/bin/gbbox find"
324 [[ ! -x ${executable% *} ]] && echo "### error, no $executable..." && return 1
325 [[ -z $logOutputPath ]] && logOutputPath="./"
326
327 maxCollectionLength=10000
328
329 export GCLIENT_COMMAND_MAXWAIT=600
330 export GCLIENT_COMMAND_RETRY=20
331 export GCLIENT_SERVER_RESELECT=4
332 export GCLIENT_SERVER_RECONNECT=2
333 export GCLIENT_RETRY_DAMPING=1.2
334 export GCLIENT_RETRY_SLEEPTIME=2
335
336 iterationNumber=0
337 numberOfFiles=$maxCollectionLength
338 rm -f $logOutputPath/alien_find.err
339 while [[ $numberOfFiles -ge $maxCollectionLength && $iterationNumber -lt 100 ]]; do
340 numberOfFiles=0
341 offset=$((maxCollectionLength*iterationNumber-1));
342 [[ $offset -lt 0 ]] && offset=0;
343 $executable -x coll -l ${maxCollectionLength} -o ${offset} "$@" 2>>$logOutputPath/alien_find.err \
344 | while read -a fields;
345 do
346 nfields=${#fields[*]}
347 turl=""
348 md5=""
349 ctime=""
350 size=""
351 for ((x=1;x<=${nfields};x++)); do
352 field=${fields[${x}]}
353 if [[ "${field}" == "md5="* ]]; then
354 eval ${field}
355 fi
356 if [[ "${field}" == "turl="* ]]; then
357 eval ${field}
358 fi
359 if [[ "${field}" == "ctime="* ]]; then
360 eval ${field}" "${fields[((x+1))]}
361 fi
362 if [[ "${field}" == "size="* ]]; then
363 eval ${field}" "${fields[((x+1))]}
364 fi
365 done
366 ctime=$( date -d "${ctime}" +%s 2>/dev/null)
367 [[ -z $md5 ]] && md5="."
368 [[ -n "$turl" ]] && echo "${turl//"alien://"/} ${md5} ${ctime} ${size}" && ((numberOfFiles++))
369 done
370 ((iterationNumber++))
371 done
372 return 0
373}
374
375alien_find_split()
376{
377 #split the search in sub searches in the subdirectories of the base path
378 basePath=${1}
379 searchTerm=${2}
380 subPathSelection=${3}
381 [[ -z ${subPathSelection} ]] && subPathSelection=".*"
382 gbbox ls ${basePath} 2>/dev/null | \
383 while read subPath; do
384 [[ ! ${subPath} =~ ${subPathSelection} ]] && continue
385 alien_find ${basePath}/${subPath} ${searchTerm}
386 done
387}
388
389listCollectionContents()
390{
391 #find the xml collections and print the list of filenames and hashes
392 while read -a fields; do
393 nfields=${#fields[*]}
394 turl=""
395 md5=""
396 ctime=""
397 for ((x=1;x<=${nfields};x++)); do
398 field=${fields[${x}]}
399 if [[ "${field}" == "md5="* ]]; then
400 eval ${field}
401 fi
402 if [[ "${field}" == "turl="* ]]; then
403 eval ${field}
404 fi
405 if [[ "${field}" == "ctime="* ]]; then
406 eval "${field} ${fields[((x+1))]}"
407 fi
408 done
409 ctime=$( date -d "${ctime}" +%s 2>/dev/null)
410 [[ -n "$turl" ]] && echo "${turl//"alien://"/} ${md5} ${ctime}"
411 done < <(catCollections $1 $2 2>/dev/null)
412}
413
414catCollections()
415{
416 #print the contents of collection(s)
417 if [[ $# -eq 2 ]]; then
418 while read collection; do
419 [[ $collection != "/"*"/"?* ]] && continue
420 gbbox cat $collection
421 done < <(alien_find $1 $2)
422 elif [[ $# -eq 1 ]]; then
423 gbbox cat $1
424 fi
425}
426
427haveAlienToken()
428{
429 #only get a new token if the old one expires soon
430 maxExpireTime=$1
431 [[ -z $maxExpireTime ]] && maxExpireTime=4000
432 [[ -z $ALIEN_ROOT ]] && echo "no ALIEN_ROOT!" && return 1
433 now=$(date "+%s")
434 tokenExpirationTime=$($ALIEN_ROOT/api/bin/alien-token-info|grep Expires)
435 tokenExpirationTime=$(date -d "${tokenExpirationTime#*:}" "+%s")
436 secondsToExpire=$(( tokenExpirationTime-now ))
437 if [[ $secondsToExpire -lt $maxExpireTime ]]; then
438 return 1
439 else
440 echo "token valid for another $secondsToExpire seconds"
441 return 0
442 fi
443}
444
445copyFromAlien()
446{
447 #copy the file $1 to $2 using a specified method
448 #uses the "timelimit" command to make sure the
449 #download processes will not hang forever.
450 #
451 #("timelimit" prints a default message if it kills the command,
452 #"timeout" does not, but may be more compatible with more
453 #systems as it is a part of coreutils)
454 [[ -z $copyTimeout ]] && copyTimeout=600
455 [[ -z $copyTimeoutHard ]] && copyTimeoutHard=1200
456 src=${1//"alien://"/}
457 src="alien://${src}"
458 dst=$2
459 if [[ "$copyMethod" == "tfilecp" ]]; then
460 echo timelimit -t $copyTimeout -T $copyTimeoutHard root -b -q "$copyScript(\"$src\",\"$dst\")"
461 timelimit -t $copyTimeout -T $copyTimeoutHard root -b -q "$copyScript(\"$src\",\"$dst\")"
462 else
463 echo timelimit -t $copyTimeout -T $copyTimeoutHard $ALIEN_ROOT/api/bin/alien_cp $src $dst
464 timelimit -t $copyTimeout -T $copyTimeoutHard $ALIEN_ROOT/api/bin/alien_cp $src $dst
465 fi
466}
467
468main "$@"