3 # - script to sync a group of files on alien with a local cache
4 # downloads only new and updated files
5 # - by default it mirrors the directory structure in a specified local location
6 # (the local chache location and paths can be manipulated.)
7 # - needs a configured config file (by default alienSync.config)
8 # and a working alien environment (token and at least $ALIEN_DIR or $ALIEN_ROOT set)
10 # origin: Mikolaj Krzewicki, mikolaj.krzewicki@cern.ch
12 if [ ${BASH_VERSINFO} -lt 4 ]; then
13 echo "bash version >= 4 needed, you have ${BASH_VERSION}, exiting..."
19 if [[ $# -lt 1 ]]; then
20 echo "Usage: ${0##*/} configFile=/path/to/config"
21 echo "expert: ${0##*/} alienFindCommand=\"alien_find /some/path/ file\" [opt=value]"
22 echo " ${0##*/} alienFindCommand=\"alien_find /some/path/ file\" localPathPrefix=\${PWD}"
24 echo "by default files are downloaded to current dir, or \${alienSync_localPathPrefix}, if set."
25 echo "At least specify alienFindCommand, either on command line or in the configFile."
26 echo "the logs go by default to localPathPrefix/alienSyncLogs"
30 #be nice and allow group members access as well (002 will create dirs with 775 and files with 664)
33 # try to load the config file
34 #[[ ! -f $1 ]] && echo "config file $1 not found, exiting..." | tee -a $logFile && exit 1
35 if ! parseConfig "$@"; then return 1; fi
37 [[ -z ${alienFindCommand} ]] && echo "alienFindCommand not defined!" && return 1
39 #if not set, use the default group
40 [[ -z ${alienSyncFilesGroupOwnership} ]] && alienSyncFilesGroupOwnership=$(id -gn)
43 [[ ! -d $logOutputPath ]] && echo "logOutputPath not available, creating..." && mkdir -p $logOutputPath && chgrp ${alienSyncFilesGroupOwnership} ${logOutputPath}
44 [[ ! -d $logOutputPath ]] && echo "could not create log dir, exiting..." && exit 1
45 dateString=$(date +%Y-%m-%d-%H-%M)
46 logFile=$logOutputPath/alienSync-$dateString.log
47 echo "$0 $@"|tee -a $logFile
48 echo ""|tee -a $logFile
52 lockFile=$logOutputPath/runningNow.lock
53 [[ -f $lockFile && ${allowConcurrent} -ne 1 ]] && echo "locked. Another process running? ($lockFile)" | tee -a $logFile && exit 1
55 [[ ! -f $lockFile ]] && echo "unable to create lock. exiting..." | tee -a $logFile && exit 1
57 #redirect all output to a log
58 if [[ $allOutputToLog -eq 1 ]]; then
63 newFilesList=$logOutputPath/"newFiles.list"
66 redoneFilesList=$logOutputPath/"redoneFiles.list"
67 rm -f $redoneFilesList
68 touch $redoneFilesList
69 updatedFilesList="${logOutputPath}/updatedFiles.list"
70 failedDownloadList="${logOutputPath}/failedDownload.list"
71 touch ${failedDownloadList}
75 [[ -z $alienFindCommand ]] && echo "alienFindCommand not defined, exiting..." && exitScript 1
76 [[ -z ${localPathPrefix} ]] && echo "localPathPrefix not defined, exiting..." && exitScript 1
77 [[ -z $logOutputPath ]] && echo "logOutputPath not defined, exiting..." && exitScript 1
78 [[ -z $secondsToSuicide ]] && echo "setting default secondsToSuicide of 10 hrs..." && secondsToSuicide=$(( 10*3600 ))
81 [[ -z $ALIEN_ROOT && -n $ALIEN_DIR ]] && ALIEN_ROOT=$ALIEN_DIR
82 #if ! haveAlienToken; then
83 # $ALIEN_ROOT/api/bin/alien-token-destroy
84 $ALIEN_ROOT/api/bin/alien-token-init $alienUserName
86 #if ! haveAlienToken; then
87 # if [[ $allOutputToLog -eq 1 ]]; then
90 # echo "problems getting token! exiting..." | tee -a $logFile
93 #ls -ltr /tmp/gclient_env_$UID
94 #cat /tmp/gclient_env_$UID
95 source /tmp/gclient_env_$UID
97 #set a default timeout for grid access
98 [[ -z $copyTimeout ]] && copyTimeout=600
99 export GCLIENT_COMMAND_MAXWAIT=$copyTimeout
101 localAlienDatabase=$logOutputPath/localAlienDatabase.list
102 localFileList=$logOutputPath/localFile.list
104 alienFileListCurrent=$logOutputPath/alienFileDatabase.list
105 [[ ! -f $localFileList ]] && touch $localFileList
106 candidateLocalFileDatabase=$logOutputPath/candidateLocalFileDatabase.list
108 #here we produce the current alien file list
109 if [[ -n ${useExistingAlienFileDatabase} && -f ${localAlienDatabase} ]]; then
111 echo "using ${localAlienDatabase} instead of full alien search"
112 echo cp -f ${localAlienDatabase} ${alienFileListCurrent}
113 cp -f ${localAlienDatabase} ${alienFileListCurrent}
116 echo "eval $alienFindCommand > $alienFileListCurrent"
117 eval "$alienFindCommand" > $alienFileListCurrent
120 echo "number of files in the collection: $(wc -l $alienFileListCurrent)"
121 #create a list of candidate destination locations
122 #this is in case there are more files on alien trying to get to the same local destination
123 #in which case we take the one with the youngest ctime (later in code)
124 if [[ -n ${destinationModifyCommand} ]]; then
125 echo eval "cat $alienFileListCurrent | ${destinationModifyCommand} | sed \"s,^,${localPathPrefix},\" > ${candidateLocalFileDatabase}"
126 eval "cat $alienFileListCurrent | ${destinationModifyCommand} | sed \"s,^,${localPathPrefix},\" > ${candidateLocalFileDatabase}"
129 #logic is: if file list is missing we force the md5 recalculation
130 [[ ! -f $localAlienDatabase ]] && forceLocalMD5recalculation=1 && echo "forcing local MD5 sum recalculation" && cp -f $alienFileListCurrent $localAlienDatabase
132 #since we grep through the files frequently, copy some stuff to tmpfs for fast access
133 tmp=$(mktemp -d 2>/dev/null)
134 if [[ -d $tmp ]]; then
135 cp $localAlienDatabase $tmp
136 cp $localFileList $tmp
137 cp $alienFileListCurrent $tmp
138 [[ -f ${candidateLocalFileDatabase} ]] && cp ${candidateLocalFileDatabase} ${tmp}
143 echo "starting downloading:"
147 downloadedFileCounter=0
148 while read -r alienFile md5alien timestamp size
152 #sometimes the md5 turns out empty and is then stored as a "." to avoid problems parsing
153 [[ "$md5alien" =~ "." ]] && md5alien=""
155 [[ -n $timeStampInLog ]] && date
156 [[ $SECONDS -ge $secondsToSuicide ]] && echo "$SECONDS seconds passed, exiting by suicide..." && break
157 [[ "$alienFile" != "/"*"/"?* ]] && echo "WARNING: read line not path-like: $alienFile" && continue
158 ((alienFileCounter++))
159 destination=${localPathPrefix}/${alienFile}
160 destination=${destination//\/\///} #remove double slashes
161 [[ -n ${destinationModifyCommand} ]] && destination=$( eval "echo ${destination} | ${destinationModifyCommand}" )
162 destinationdir=${destination%/*}
163 [[ -n $softLinkName ]] && softlinktodestination=${destinationdir}/${softLinkName}
164 tmpdestination="${destination}.aliensyncTMP"
166 #if we allow concurrent running (DANGEROUS) check if somebody is already trying to process this file
167 if [[ -f ${tmpdestination} && ${allowConcurrent} -eq 1 ]]; then
168 echo "$tmpdestination exists - concurrent donwload? skipping..."
172 if [[ -n ${destinationModifyCommand} ]]; then
173 #find the candidate in the database, in case there are more files trying to go to the same
174 #place due to $destinationModifyCommand which alters the final path, find the one
175 #with the largest ctime (3rd field in the database list) and check if that is the current one
177 #echo grep -n ${destination} $candidateLocalFileDatabase | sed "s/:/ /" | sort -rk4
178 #grep -n ${destination} $candidateLocalFileDatabase| sed "s/:/ /" | sort -rk4
179 #this guy contains: index of the original entry, local file name, md5, ctime
180 candidateDBrecord=($(grep -n ${destination} $tmp/${candidateLocalFileDatabase##*/}| sed "s/:/ /" | sort -rk4|head -n1 ))
181 originalEntryIndex=${candidateDBrecord[0]}
182 [[ $lineNumber -ne $originalEntryIndex ]] && continue
186 if [[ -f ${destination} ]]; then
187 #soft link the downloaded file (maybe to provide a consistent link to the latest version)
188 if [[ -n $softlinktodestination ]]; then
189 echo ln -sf ${destination} ${softlinktodestination}
190 ln -sf ${destination} ${softlinktodestination}
192 ((localFileCounter++))
194 localDBrecord=($(grep $alienFile $tmp/${localAlienDatabase##*/}))
195 md5local=${localDBrecord[1]}
197 #sometimes the md5 turns out empty and is then stored as a "." to avoid problems parsing
198 [[ "$md5local" =~ "." ]] && md5local=""
200 if [[ $forceLocalMD5recalculation -eq 1 || -z $md5local ]]; then
201 md5recalculated=$(checkMD5sum ${destination})
202 [[ "$md5local" != "$md5recalculated" ]] && echo "WARNING: local copy change ${destination}"
203 md5local=${md5recalculated}
205 if [[ "$md5local" == "$md5alien" && -n $md5alien ]]; then
206 echo "OK ${destination} $md5alien"
207 if ! grep -q ${destination} $tmp/${localFileList##*/}; then
208 echo ${destination} >> $localFileList
212 if [[ -z $md5alien ]]; then
213 if ! grep -q ${destination} $tmp/${localFileList##*/}; then
214 echo ${destination} >> $localFileList
216 echo "WARNING: missing alien md5, leaving the local file as it is"
219 echo "WARNING: md5 mismatch ${destination}"
220 echo " $md5local $md5alien"
224 [[ -f $tmpdestination ]] && echo "WARNING: stale $tmpdestination, removing" && rm $tmpdestination
226 mkdir -p ${destinationdir} && chgrp ${alienSyncFilesGroupOwnership} ${destinationdir}
227 [[ ! -d $destinationdir ]] && echo cannot access $destinationdir && continue
230 #if ! haveAlienToken; then
231 # $ALIEN_ROOT/api/bin/alien-token-init $alienUserName
232 # #source /tmp/gclient_env_$UID
238 export copyTimeoutHard
239 echo copyFromAlien "$alienFile" "$tmpdestination"
240 [[ $pretend -eq 1 ]] && continue
241 copyFromAlien $alienFile $tmpdestination
242 chgrp ${alienSyncFilesGroupOwnership} $tmpdestination
244 # if we didn't download remove the destination in case we tried to redownload
246 [[ ! -f $tmpdestination ]] && echo "file not downloaded" && rm -f ${destination} && continue
249 #verify the downloaded md5 if available, validate otherwise...
250 if [[ -n $md5alien ]]; then
251 md5recalculated=$(checkMD5sum ${tmpdestination})
252 if [[ ${md5alien} == ${md5recalculated} ]]; then
253 echo "OK md5 after download"
256 echo "failed verifying md5 $md5alien of $tmpdestination"
262 #handle zip files - check the checksums
263 if [[ $alienFile =~ '.zip' && $downloadOK -eq 1 ]]; then
264 echo "checking integrity of zip archive $tmpdestination"
265 if unzip -t $tmpdestination; then
272 if [[ $downloadOK -eq 1 ]]; then
273 echo mv $tmpdestination ${destination}
274 mv $tmpdestination ${destination}
275 chgrp ${alienSyncFilesGroupOwnership} ${destination}
276 ((downloadedFileCounter++))
277 if [[ -n $softlinktodestination ]]; then
278 echo ln -s ${destination} $softlinktodestination
279 ln -s ${destination} $softlinktodestination
281 [[ -z $redownloading ]] && echo ${destination} >> $newFilesList
282 [[ -n $redownloading ]] && echo ${destination} >> $redoneFilesList
283 if ! grep -q ${destination} $tmp/${localFileList##*/}; then
284 echo ${destination} >> $localFileList
286 [[ -n ${postCommand} ]] && ( cd ${destinationdir}; eval "${postCommand}" )
287 if grep -q ${alienFile} ${failedDownloadList}; then
288 echo "removing ${alienFile} from ${failedDownloadList}"
289 grep -v ${alienFile} ${failedDownloadList} >tmpUpdatedFailed
290 mv tmpUpdatedFailed ${failedDownloadList}
293 echo "download not validated, NOT moving to ${destination}..."
294 echo "removing $tmpdestination"
295 rm -f $tmpdestination
296 echo ${alienFile} >> ${failedDownloadList}
300 [[ -f $tmpdestination ]] && echo "WARNING: tmpdestination should not still be here! removing..." && rm -r ${tmpdestination}
302 if [[ $unzipFiles -eq 1 ]]; then
303 echo unzip -o ${destination} -d ${destinationdir}
304 unzip -o ${destination} -d ${destinationdir}
308 done < ${alienFileListCurrent}
310 [[ $alienFileCounter -gt 0 ]] && mv -f $alienFileListCurrent $localAlienDatabase
314 if [[ $allOutputToLog -eq 1 ]]; then
318 cat ${newFilesList} ${redoneFilesList} > ${updatedFilesList}
320 echo alienFindCommand:
321 echo " $alienFindCommand"
323 echo "files on alien: $alienFileCounter"
324 echo "local files before: $localFileCounter"
325 echo "files downloaded: $downloadedFileCounter"
337 #output the list of failed files to stdout, so the cronjob can mail it
338 echo '###############################'
339 echo "failed to download from alien:"
341 local tmpfailed=$(mktemp)
342 [[ "$(cat ${failedDownloadList} | wc -l)" -gt 0 ]] && sort ${failedDownloadList} | uniq -c | awk 'BEGIN{print "#tries\t file" }{print $1 "\t " $2}' | tee ${tmpfailed}
344 [[ -n ${MAILTO} ]] && echo $logFile | mail -s "alienSync ${alienFindCommand} done" ${MAILTO}
348 echo '###############################'
349 echo "eval ${executeEnd}"
358 echo removing $lockFile
367 # like a regular alien_find command
368 # output is a list with md5 sums and ctimes
369 executable="$ALIEN_ROOT/api/bin/gbbox find"
370 [[ ! -x ${executable% *} ]] && echo "### error, no $executable..." && return 1
371 [[ -z $logOutputPath ]] && logOutputPath="./"
373 maxCollectionLength=10000
375 export GCLIENT_COMMAND_MAXWAIT=600
376 export GCLIENT_COMMAND_RETRY=20
377 export GCLIENT_SERVER_RESELECT=4
378 export GCLIENT_SERVER_RECONNECT=2
379 export GCLIENT_RETRY_DAMPING=1.2
380 export GCLIENT_RETRY_SLEEPTIME=2
383 numberOfFiles=$maxCollectionLength
384 rm -f $logOutputPath/alien_find.err
385 while [[ $numberOfFiles -ge $maxCollectionLength && $iterationNumber -lt 100 ]]; do
387 offset=$((maxCollectionLength*iterationNumber-1));
388 [[ $offset -lt 0 ]] && offset=0;
389 $executable -x coll -l ${maxCollectionLength} -o ${offset} "$@" 2>>$logOutputPath/alien_find.err \
390 | while read -a fields;
392 nfields=${#fields[*]}
397 for ((x=1;x<=${nfields};x++)); do
398 field=${fields[${x}]}
399 if [[ "${field}" == "md5="* ]]; then
402 if [[ "${field}" == "turl="* ]]; then
405 if [[ "${field}" == "ctime="* ]]; then
406 eval ${field}" "${fields[((x+1))]}
408 if [[ "${field}" == "size="* ]]; then
409 eval ${field}" "${fields[((x+1))]}
412 ctime=$( date -d "${ctime}" +%s 2>/dev/null)
413 [[ -z $md5 ]] && md5="."
414 [[ -n "$turl" ]] && echo "${turl//"alien://"/} ${md5} ${ctime} ${size}" && ((numberOfFiles++))
416 ((iterationNumber++))
423 #split the search in sub searches in the subdirectories of the base path
426 subPathSelection=${3}
427 [[ -z ${subPathSelection} ]] && subPathSelection=".*"
428 gbbox ls ${basePath} 2>/dev/null | \
429 while read subPath; do
430 [[ ! ${subPath} =~ ${subPathSelection} ]] && continue
431 alien_find ${basePath}/${subPath} ${searchTerm}
435 listCollectionContents()
437 #find the xml collections and print the list of filenames and hashes
438 while read -a fields; do
439 nfields=${#fields[*]}
443 for ((x=1;x<=${nfields};x++)); do
444 field=${fields[${x}]}
445 if [[ "${field}" == "md5="* ]]; then
448 if [[ "${field}" == "turl="* ]]; then
451 if [[ "${field}" == "ctime="* ]]; then
452 eval "${field} ${fields[((x+1))]}"
455 ctime=$( date -d "${ctime}" +%s 2>/dev/null)
456 [[ -n "$turl" ]] && echo "${turl//"alien://"/} ${md5} ${ctime}"
457 done < <(catCollections $1 $2 2>/dev/null)
462 #print the contents of collection(s)
463 if [[ $# -eq 2 ]]; then
464 while read collection; do
465 [[ $collection != "/"*"/"?* ]] && continue
466 gbbox cat $collection
467 done < <(alien_find $1 $2)
468 elif [[ $# -eq 1 ]]; then
475 #only get a new token if the old one expires soon
477 [[ -z $maxExpireTime ]] && maxExpireTime=4000
478 [[ -z $ALIEN_ROOT ]] && echo "no ALIEN_ROOT!" && return 1
480 tokenExpirationTime=$($ALIEN_ROOT/api/bin/alien-token-info|grep Expires)
481 tokenExpirationTime=$(date -d "${tokenExpirationTime#*:}" "+%s")
482 secondsToExpire=$(( tokenExpirationTime-now ))
483 if [[ $secondsToExpire -lt $maxExpireTime ]]; then
486 echo "token valid for another $secondsToExpire seconds"
493 #copy the file $1 to $2 using a specified method
494 #uses the "timeout" command to make sure the
495 #download processes will not hang forever.
497 [[ -z $copyTimeout ]] && copyTimeout=600
498 [[ -z $copyTimeoutHard ]] && copyTimeoutHard=1200
499 src=${1//"alien://"/}
502 if [[ "$copyMethod" == "tfilecp" ]]; then
503 if which timeout &>/dev/null; then
504 echo timeout $copyTimeout root -b -q "$copyScript(\"$src\",\"$dst\")"
505 timeout $copyTimeout root -b -q "$copyScript(\"$src\",\"$dst\")"
507 echo root -b -q "$copyScript(\"$src\",\"$dst\")"
508 root -b -q "$copyScript(\"$src\",\"$dst\")"
511 if which timeout &>/dev/null; then
512 echo timeout $copyTimeout $ALIEN_ROOT/api/bin/alien_cp $src $dst
513 timeout $copyTimeout $ALIEN_ROOT/api/bin/alien_cp $src $dst
515 echo $ALIEN_ROOT/api/bin/alien_cp $src $dst
516 $ALIEN_ROOT/api/bin/alien_cp $src $dst
526 secondsToSuicide=$(( 10*3600 ))
527 localPathPrefix="${PWD}"
528 #define alienSync_localPathPrefix in your env to have a default central location
529 [[ -n ${alienSync_localPathPrefix} ]] && localPathPrefix=${alienSync_localPathPrefix}
535 #first, check if the config file is configured
536 #is yes - source it so that other options can override it
538 for opt in "${args[@]}"; do
539 if [[ ${opt} =~ configFile=.* ]]; then
541 [[ ! -f ${configFile} ]] && echo "configFile ${configFile} not found, exiting..." && return 1
542 echo "using config file: ${configFile}"
543 source "${configFile}"
548 #then, parse the options as they override the options from file
549 for opt in "${args[@]}"; do
550 if [[ ! "${opt}" =~ .*=.* ]]; then
551 echo "badly formatted option ${var}, should be: option=value, stopping..."
554 local var="${opt%%=*}"
555 local value="${opt#*=}"
556 echo "${var} = ${value}"
557 export ${var}="${value}"
560 #things that by default depend on other variables should be set here, after the dependencies
561 [[ -z ${logOutputPath} ]] && logOutputPath="${localPathPrefix}/alienSyncLogs"
568 [[ ! -f ${file} ]] && return 1
569 if which md5sum &>/dev/null; then
570 local tmp=($(md5sum ${file}))
572 elif which md5 &>/dev/null; then
573 local tmp=($(md5 ${file}))