handle bash args properly using arrays
[u/mrichter/AliRoot.git] / PWGPP / QA / scripts / alienSync.sh
CommitLineData
d4ab9e58 1#!/bin/bash
2#
3# - script to sync a group of files on alien with a local cache
4# downloads only new and updated files
5# - by default it mirrors the directory structure in a specified local location
6# (the local chache location and paths can be manipulated.)
7# - needs a configured config file (by default alienSync.config)
8# and a working alien environment (token and at least $ALIEN_DIR or $ALIEN_ROOT set)
9#
10# origin: Mikolaj Krzewicki, mikolaj.krzewicki@cern.ch
11#
12main()
13{
14 if [[ $# -lt 1 ]]; then
d56eeaab 15 echo "Usage: ${0##*/} configFile=/path/to/config"
16 echo "expert: ${0##*/} alienFindCommand=\"alien_find /some/path/ file\" [opt=value]"
d4ab9e58 17 return
18 fi
19
20 # try to load the config file
d56eeaab 21 #[[ ! -f $1 ]] && echo "config file $1 not found, exiting..." | tee -a $logFile && exit 1
22 if ! parseConfig "$@"; then return 1; fi
23
24 if [[ -z ${alienFindCommand} ]] && echo "alienFindCommand not defined!" && return 1
25
6c8c572d 26 #if not set, use the default group
27 [[ -z ${alienSyncFilesGroupOwnership} ]] && alienSyncFilesGroupOwnership=$(id -gn)
d4ab9e58 28
29 # do some accounting
30 [[ ! -d $logOutputPath ]] && echo "logOutputPath not available, creating..." && sg ${alienSyncFilesGroupOwnership} "mkdir -p $logOutputPath"
31 [[ ! -d $logOutputPath ]] && echo "could not create log dir, exiting..." && exit 1
32 dateString=$(date +%Y-%m-%d-%H-%M)
33 logFile=$logOutputPath/alienSync-$dateString.log
34 echo "$0 $@"|tee -a $logFile
35 echo ""|tee -a $logFile
36 echo log: $logFile
37
38 #be nice and allow group members access as well (002 will create dirs with 775 and files with 664)
39 umask 0002
40
41 #lock
42 lockFile=$logOutputPath/runningNow.lock
43 [[ -f $lockFile ]] && echo "locked. Another process running? ($lockFile)" | tee -a $logFile && exit 1
44 touch $lockFile
45 [[ ! -f $lockFile ]] && echo "unable to create lock. exiting..." | tee -a $logFile && exit 1
46
47 #redirect all output to a log
48 if [[ $allOutputToLog -eq 1 ]]; then
49 exec 6>&1
50 exec 1>$logFile 2>&1
51 fi
52
53 newFilesList=$logOutputPath/"newFiles.list"
54 rm -f $newFilesList
55 touch $newFilesList
56 redoneFilesList=$logOutputPath/"redoneFiles.list"
57 rm -f $redoneFilesList
58 touch $redoneFilesList
59 updatedFilesList="${logOutputPath}/updatedFiles.list"
60
61 # check the config
62 [[ -z $alienFindCommand ]] && echo "alienFindCommand not defined, exiting..." && exitScript 1
63 [[ -z ${localPathPrefix} ]] && echo "localPathPrefix not defined, exiting..." && exitScript 1
64 [[ -z $logOutputPath ]] && echo "logOutputPath not defined, exiting..." && exitScript 1
65 [[ -z $secondsToSuicide ]] && echo "setting default secondsToSuicide of 10 hrs..." && secondsToSuicide=$(( 10*3600 ))
66
67 # init alien
d4ab9e58 68 [[ -z $ALIEN_ROOT && -n $ALIEN_DIR ]] && ALIEN_ROOT=$ALIEN_DIR
69 #if ! haveAlienToken; then
70 # $ALIEN_ROOT/api/bin/alien-token-destroy
71 $ALIEN_ROOT/api/bin/alien-token-init $alienUserName
72 #fi
73 #if ! haveAlienToken; then
74 # if [[ $allOutputToLog -eq 1 ]]; then
75 # exec 1>&6 6>&-
76 # fi
77 # echo "problems getting token! exiting..." | tee -a $logFile
78 # exitScript 1
79 #fi
80 #ls -ltr /tmp/gclient_env_$UID
81 #cat /tmp/gclient_env_$UID
6c8c572d 82 source /tmp/gclient_env_$UID
d4ab9e58 83
84 #set a default timeout for grid access
85 [[ -z $copyTimeout ]] && copyTimeout=600
86 export GCLIENT_COMMAND_MAXWAIT=$copyTimeout
87
88 localAlienDatabase=$logOutputPath/localAlienDatabase.list
89 localFileList=$logOutputPath/localFile.list
90
91 alienFileListCurrent=$logOutputPath/alienFileDatabase.list
92 [[ ! -f $localFileList ]] && touch $localFileList
93 candidateLocalFileDatabase=$logOutputPath/candidateLocalFileDatabase.list
94
95 #here we produce the current alien file list
96 if [[ -n ${useExistingAlienFileDatabase} && -f ${localAlienDatabase} ]]; then
97 #we use the old one
98 echo "using ${localAlienDatabase} instead of full alien search"
99 echo cp -f ${localAlienDatabase} ${alienFileListCurrent}
100 cp -f ${localAlienDatabase} ${alienFileListCurrent}
101 else
102 #we make a new one
103 echo "eval $alienFindCommand > $alienFileListCurrent"
104 eval "$alienFindCommand" > $alienFileListCurrent
105 fi
106
107 echo "number of files in the collection: $(wc -l $alienFileListCurrent)"
108 #create a list of candidate destination locations
109 #this is in case there are more files on alien trying to get to the same local destination
110 #in which case we take the one with the youngest ctime (later in code)
111 if [[ -n ${destinationModifyCommand} ]]; then
112 echo eval "cat $alienFileListCurrent | ${destinationModifyCommand} | sed \"s,^,${localPathPrefix},\" > ${candidateLocalFileDatabase}"
113 eval "cat $alienFileListCurrent | ${destinationModifyCommand} | sed \"s,^,${localPathPrefix},\" > ${candidateLocalFileDatabase}"
114 fi
115
116 #logic is: if file list is missing we force the md5 recalculation
117 [[ ! -f $localAlienDatabase ]] && forceLocalMD5recalculation=1 && echo "forcing local MD5 sum recalculation" && cp -f $alienFileListCurrent $localAlienDatabase
118
119 #since we grep through the files frequently, copy some stuff to tmpfs for fast access
120 tmp=$(mktemp -d 2>/dev/null)
121 if [[ -d $tmp ]]; then
122 cp $localAlienDatabase $tmp
123 cp $localFileList $tmp
124 cp $alienFileListCurrent $tmp
125 [[ -f ${candidateLocalFileDatabase} ]] && cp ${candidateLocalFileDatabase} ${tmp}
126 else
127 tmp=$logOutputPath
128 fi
129
130 echo "starting downloading:"
131 lineNumber=0
132 alienFileCounter=0
133 localFileCounter=0
134 downloadedFileCounter=0
135 while read -r alienFile md5alien timestamp size
136 do
137 ((lineNumber++))
138
139 #sometimes the md5 turns out empty and is then stored as a "." to avoid problems parsing
140 [[ "$md5alien" =~ "." ]] && md5alien=""
141
142 [[ -n $timeStampInLog ]] && date
143 [[ $SECONDS -ge $secondsToSuicide ]] && echo "$SECONDS seconds passed, exiting by suicide..." && break
144 [[ "$alienFile" != "/"*"/"?* ]] && echo "WARNING: read line not path-like: $alienFile" && continue
145 ((alienFileCounter++))
146 destination=${localPathPrefix}/${alienFile}
147 destination=${destination//\/\///} #remove double slashes
148 [[ -n ${destinationModifyCommand} ]] && destination=$( eval "echo ${destination} | ${destinationModifyCommand}" )
149 destinationdir=${destination%/*}
150 [[ -n $softLinkName ]] && softlinktodestination=${destinationdir}/${softLinkName}
151 tmpdestination="${destination}.aliensyncTMP"
152
153 if [[ -n ${destinationModifyCommand} ]]; then
154 #find the candidate in the database, in case there are more files trying to go to the same
155 #place due to $destinationModifyCommand which alters the final path, find the one
156 #with the largest ctime (3rd field in the database list) and check if that is the current one
157 #if not - skip
158 #echo grep -n ${destination} $candidateLocalFileDatabase | sed "s/:/ /" | sort -rk4
159 #grep -n ${destination} $candidateLocalFileDatabase| sed "s/:/ /" | sort -rk4
160 #this guy contains: index of the original entry, local file name, md5, ctime
161 candidateDBrecord=($(grep -n ${destination} $tmp/${candidateLocalFileDatabase##*/}| sed "s/:/ /" | sort -rk4|head -n1 ))
162 originalEntryIndex=${candidateDBrecord[0]}
163 [[ $lineNumber -ne $originalEntryIndex ]] && continue
164 fi
165
166 redownloading=""
167 if [[ -f ${destination} ]]; then
9c9427a5 168 #soft link the downloaded file (maybe to provide a consistent link to the latest version)
169 if [[ -n $softlinktodestination ]]; then
170 echo ln -sf ${destination} ${softlinktodestination}
171 ln -sf ${destination} ${softlinktodestination}
d4ab9e58 172 fi
173 ((localFileCounter++))
174
175 localDBrecord=($(grep $alienFile $tmp/${localAlienDatabase##*/}))
176 md5local=${localDBrecord[1]}
177
178 #sometimes the md5 turns out empty and is then stored as a "." to avoid problems parsing
179 [[ "$md5local" =~ "." ]] && md5local=""
180
181 if [[ $forceLocalMD5recalculation -eq 1 || -z $md5local ]]; then
182 tmparrayMD5=($(md5sum ${destination}))
183 md5recalculated=${tmparrayMD5[0]}
184 [[ "$md5local" != "$md5recalculated" ]] && echo "WARNING: local copy change ${destination}"
185 md5local=${md5recalculated}
186 fi
187 if [[ "$md5local" == "$md5alien" && -n $md5alien ]]; then
188 echo "OK ${destination} $md5alien"
189 if ! grep -q ${destination} $tmp/${localFileList##*/}; then
190 echo ${destination} >> $localFileList
191 fi
192 continue
193 fi
194 if [[ -z $md5alien ]]; then
195 if ! grep -q ${destination} $tmp/${localFileList##*/}; then
196 echo ${destination} >> $localFileList
197 fi
198 echo "WARNING: missing alien md5, leaving the local file as it is"
199 continue
200 fi
201 echo "WARNING: md5 mismatch ${destination}"
202 echo " $md5local $md5alien"
203 redownloading=1
204 fi
205
206 [[ -f $tmpdestination ]] && echo "WARNING: stale $tmpdestination, removing" && rm $tmpdestination
207
208 sg ${alienSyncFilesGroupOwnership} "mkdir -p ${destinationdir}"
209 [[ ! -d $destinationdir ]] && echo cannot access $destinationdir && continue
210
211 #check token
212 #if ! haveAlienToken; then
213 # $ALIEN_ROOT/api/bin/alien-token-init $alienUserName
214 # #source /tmp/gclient_env_$UID
215 #fi
216
217 export copyMethod
218 export copyScript
219 export copyTimeout
220 export copyTimeoutHard
221 echo copyFromAlien "$alienFile" "$tmpdestination"
222 [[ $pretend -eq 1 ]] && continue
223 copyFromAlien $alienFile $tmpdestination
224 chgrp ${alienSyncFilesGroupOwnership} $tmpdestination
225
226 # if we didn't download remove the destination in case we tried to redownload
227 # a corrupted file
228 [[ ! -f $tmpdestination ]] && echo "file not downloaded" && rm -f ${destination} && continue
229
230 downloadOK=0
231 #verify the downloaded md5 if available, validate otherwise...
232 if [[ -n $md5alien ]]; then
233 if (echo "$md5alien $tmpdestination"|md5sum -c --status -); then
234 echo "OK md5 after download"
235 downloadOK=1
236 else
6c8c572d 237 echo "failed verifying md5 $md5alien of $tmpdestination"
d4ab9e58 238 fi
239 else
240 downloadOK=1
241 fi
242
243 #handle zip files - check the checksums
244 if [[ $alienFile =~ '.zip' && $downloadOK -eq 1 ]]; then
245 echo "checking integrity of zip archive $tmpdestination"
246 if unzip -t $tmpdestination; then
247 downloadOK=1
248 else
249 downloadOK=0
250 fi
251 fi
252
253 if [[ $downloadOK -eq 1 ]]; then
254 echo mv $tmpdestination ${destination}
255 mv $tmpdestination ${destination}
256 chgrp ${alienSyncFilesGroupOwnership} ${destination}
257 ((downloadedFileCounter++))
258 if [[ -n $softlinktodestination ]]; then
259 echo ln -s ${destination} $softlinktodestination
260 ln -s ${destination} $softlinktodestination
261 fi
262 [[ -z $redownloading ]] && echo ${destination} >> $newFilesList
263 [[ -n $redownloading ]] && echo ${destination} >> $redoneFilesList
264 if ! grep -q ${destination} $tmp/${localFileList##*/}; then
265 echo ${destination} >> $localFileList
266 fi
267 [[ -n ${postCommand} ]] && ( cd ${destinationdir}; eval "${postCommand}" )
268 else
269 echo "download not validated, NOT moving to ${destination}..."
6c8c572d 270 echo "removing $tmpdestination"
d4ab9e58 271 rm -f $tmpdestination
272 continue
273 fi
274
275 if [[ $unzipFiles -eq 1 ]]; then
276 echo unzip $tmpdestination -d $destinationdir
277 unzip $tmpdestination -d $destinationdir
278 fi
279
280 echo
281 done < ${alienFileListCurrent}
282
283 [[ $alienFileCounter -gt 0 ]] && mv -f $alienFileListCurrent $localAlienDatabase
284
285 echo ${0##*/} DONE
286
287 if [[ $allOutputToLog -eq 1 ]]; then
288 exec 1>&6 6>&-
289 fi
290
291 cat ${newFilesList} ${redoneFilesList} > ${updatedFilesList}
292 eval "${executeEnd}"
293
294 echo alienFindCommand:
295 echo " $alienFindCommand"
296 echo
297 echo "files on alien: $alienFileCounter"
298 echo "local files before: $localFileCounter"
299 echo "files downloaded: $downloadedFileCounter"
300 echo
301 echo "new files:"
302 echo
303 cat $newFilesList
304 echo
305 echo "redone files:"
306 echo
307 cat $redoneFilesList
308
309 [[ -n $sendMailTo ]] && echo $logFile | mail -s "alienSync $alienFindCommand done" $sendMailTo
310
311 exitScript 0
312}
313
314exitScript()
315{
316 echo
317 echo removing $lockFile
318 rm -f $lockFile
319 echo removing $tmp
320 rm -rf $tmp
321 exit $1
322}
323
324alien_find()
325{
326 # like a regular alien_find command
327 # output is a list with md5sums and ctimes
328 executable="$ALIEN_ROOT/api/bin/gbbox find"
329 [[ ! -x ${executable% *} ]] && echo "### error, no $executable..." && return 1
330 [[ -z $logOutputPath ]] && logOutputPath="./"
331
332 maxCollectionLength=10000
333
334 export GCLIENT_COMMAND_MAXWAIT=600
335 export GCLIENT_COMMAND_RETRY=20
336 export GCLIENT_SERVER_RESELECT=4
337 export GCLIENT_SERVER_RECONNECT=2
338 export GCLIENT_RETRY_DAMPING=1.2
339 export GCLIENT_RETRY_SLEEPTIME=2
340
341 iterationNumber=0
342 numberOfFiles=$maxCollectionLength
343 rm -f $logOutputPath/alien_find.err
344 while [[ $numberOfFiles -ge $maxCollectionLength && $iterationNumber -lt 100 ]]; do
345 numberOfFiles=0
346 offset=$((maxCollectionLength*iterationNumber-1));
347 [[ $offset -lt 0 ]] && offset=0;
348 $executable -x coll -l ${maxCollectionLength} -o ${offset} "$@" 2>>$logOutputPath/alien_find.err \
349 | while read -a fields;
350 do
351 nfields=${#fields[*]}
352 turl=""
353 md5=""
354 ctime=""
355 size=""
356 for ((x=1;x<=${nfields};x++)); do
357 field=${fields[${x}]}
358 if [[ "${field}" == "md5="* ]]; then
359 eval ${field}
360 fi
361 if [[ "${field}" == "turl="* ]]; then
362 eval ${field}
363 fi
364 if [[ "${field}" == "ctime="* ]]; then
365 eval ${field}" "${fields[((x+1))]}
366 fi
367 if [[ "${field}" == "size="* ]]; then
368 eval ${field}" "${fields[((x+1))]}
369 fi
370 done
371 ctime=$( date -d "${ctime}" +%s 2>/dev/null)
372 [[ -z $md5 ]] && md5="."
373 [[ -n "$turl" ]] && echo "${turl//"alien://"/} ${md5} ${ctime} ${size}" && ((numberOfFiles++))
374 done
375 ((iterationNumber++))
376 done
377 return 0
378}
379
380alien_find_split()
381{
382 #split the search in sub searches in the subdirectories of the base path
383 basePath=${1}
384 searchTerm=${2}
385 subPathSelection=${3}
386 [[ -z ${subPathSelection} ]] && subPathSelection=".*"
387 gbbox ls ${basePath} 2>/dev/null | \
388 while read subPath; do
389 [[ ! ${subPath} =~ ${subPathSelection} ]] && continue
390 alien_find ${basePath}/${subPath} ${searchTerm}
391 done
392}
393
394listCollectionContents()
395{
396 #find the xml collections and print the list of filenames and hashes
397 while read -a fields; do
398 nfields=${#fields[*]}
399 turl=""
400 md5=""
401 ctime=""
402 for ((x=1;x<=${nfields};x++)); do
403 field=${fields[${x}]}
404 if [[ "${field}" == "md5="* ]]; then
405 eval ${field}
406 fi
407 if [[ "${field}" == "turl="* ]]; then
408 eval ${field}
409 fi
410 if [[ "${field}" == "ctime="* ]]; then
411 eval "${field} ${fields[((x+1))]}"
412 fi
413 done
414 ctime=$( date -d "${ctime}" +%s 2>/dev/null)
415 [[ -n "$turl" ]] && echo "${turl//"alien://"/} ${md5} ${ctime}"
416 done < <(catCollections $1 $2 2>/dev/null)
417}
418
419catCollections()
420{
421 #print the contents of collection(s)
422 if [[ $# -eq 2 ]]; then
423 while read collection; do
424 [[ $collection != "/"*"/"?* ]] && continue
425 gbbox cat $collection
426 done < <(alien_find $1 $2)
427 elif [[ $# -eq 1 ]]; then
428 gbbox cat $1
429 fi
430}
431
432haveAlienToken()
433{
434 #only get a new token if the old one expires soon
435 maxExpireTime=$1
436 [[ -z $maxExpireTime ]] && maxExpireTime=4000
437 [[ -z $ALIEN_ROOT ]] && echo "no ALIEN_ROOT!" && return 1
438 now=$(date "+%s")
439 tokenExpirationTime=$($ALIEN_ROOT/api/bin/alien-token-info|grep Expires)
440 tokenExpirationTime=$(date -d "${tokenExpirationTime#*:}" "+%s")
441 secondsToExpire=$(( tokenExpirationTime-now ))
442 if [[ $secondsToExpire -lt $maxExpireTime ]]; then
443 return 1
444 else
445 echo "token valid for another $secondsToExpire seconds"
446 return 0
447 fi
448}
449
450copyFromAlien()
451{
452 #copy the file $1 to $2 using a specified method
6c8c572d 453 #uses the "timeout" command to make sure the
d4ab9e58 454 #download processes will not hang forever.
455 #
d4ab9e58 456 [[ -z $copyTimeout ]] && copyTimeout=600
457 [[ -z $copyTimeoutHard ]] && copyTimeoutHard=1200
458 src=${1//"alien://"/}
459 src="alien://${src}"
460 dst=$2
461 if [[ "$copyMethod" == "tfilecp" ]]; then
6c8c572d 462 echo timeout $copyTimeout root -b -q "$copyScript(\"$src\",\"$dst\")"
463 timeout $copyTimeout root -b -q "$copyScript(\"$src\",\"$dst\")"
d4ab9e58 464 else
6c8c572d 465 echo timeout $copyTimeout $ALIEN_ROOT/api/bin/alien_cp $src $dst
466 timeout $copyTimeout $ALIEN_ROOT/api/bin/alien_cp $src $dst
d4ab9e58 467 fi
468}
469
d56eeaab 470parseConfig()
471{
472 #config file
473 configFile=""
474 alienFindCommand=""
475 secondsToSuicide=$(( 10*3600 ))
476 localPathPrefix="${PWD}"
477 logOutputPath="${PWD}/alienSyncLogs"
478 unzipFiles=0
479 allOutputToLog=1
480
481 args=("$@")
482
483 #first, check if the config file is configured
484 #is yes - source it so that other options can override it
485 #if any
486 for opt in "${args[@]}"; do
487 if [[ ${opt} =~ configFile=.* ]]; then
488 eval "${opt}"
489 [[ ! -f ${configFile} ]] && echo "configFile ${configFile} not found, exiting..." && return 1
490 echo "using config file: ${configFile}"
491 source "${configFile}"
492 break
493 fi
494 done
495
496 #then, parse the options as they override the options from file
497 for opt in "${args[@]}"; do
498 if [[ ! "${opt}" =~ .*=.* ]]; then
499 echo "badly formatted option ${var}, should be: option=value, stopping..."
500 return 1
501 fi
502 local var="${opt%%=*}"
503 local value="${opt#*=}"
504 echo "${var} = ${value}"
505 export ${var}="${value}"
506 done
507}
508
d4ab9e58 509main "$@"