3 # - script to sync a group of files on alien with a local cache
4 # downloads only new and updated files
5 # - by default it mirrors the directory structure in a specified local location
6 # (the local chache location and paths can be manipulated.)
7 # - needs a configured config file (by default alienSync.config)
8 # and a working alien environment (token and at least $ALIEN_DIR or $ALIEN_ROOT set)
10 # origin: Mikolaj Krzewicki, mikolaj.krzewicki@cern.ch
14 if [[ $# -lt 1 ]]; then
15 echo "Usage: ${0##*/} configFile=/path/to/config"
16 echo "expert: ${0##*/} alienFindCommand=\"alien_find /some/path/ file\" [opt=value]"
20 # try to load the config file
21 #[[ ! -f $1 ]] && echo "config file $1 not found, exiting..." | tee -a $logFile && exit 1
22 if ! parseConfig "$@"; then return 1; fi
24 if [[ -z ${alienFindCommand} ]] && echo "alienFindCommand not defined!" && return 1
26 #if not set, use the default group
27 [[ -z ${alienSyncFilesGroupOwnership} ]] && alienSyncFilesGroupOwnership=$(id -gn)
30 [[ ! -d $logOutputPath ]] && echo "logOutputPath not available, creating..." && sg ${alienSyncFilesGroupOwnership} "mkdir -p $logOutputPath"
31 [[ ! -d $logOutputPath ]] && echo "could not create log dir, exiting..." && exit 1
32 dateString=$(date +%Y-%m-%d-%H-%M)
33 logFile=$logOutputPath/alienSync-$dateString.log
34 echo "$0 $@"|tee -a $logFile
35 echo ""|tee -a $logFile
38 #be nice and allow group members access as well (002 will create dirs with 775 and files with 664)
42 lockFile=$logOutputPath/runningNow.lock
43 [[ -f $lockFile ]] && echo "locked. Another process running? ($lockFile)" | tee -a $logFile && exit 1
45 [[ ! -f $lockFile ]] && echo "unable to create lock. exiting..." | tee -a $logFile && exit 1
47 #redirect all output to a log
48 if [[ $allOutputToLog -eq 1 ]]; then
53 newFilesList=$logOutputPath/"newFiles.list"
56 redoneFilesList=$logOutputPath/"redoneFiles.list"
57 rm -f $redoneFilesList
58 touch $redoneFilesList
59 updatedFilesList="${logOutputPath}/updatedFiles.list"
62 [[ -z $alienFindCommand ]] && echo "alienFindCommand not defined, exiting..." && exitScript 1
63 [[ -z ${localPathPrefix} ]] && echo "localPathPrefix not defined, exiting..." && exitScript 1
64 [[ -z $logOutputPath ]] && echo "logOutputPath not defined, exiting..." && exitScript 1
65 [[ -z $secondsToSuicide ]] && echo "setting default secondsToSuicide of 10 hrs..." && secondsToSuicide=$(( 10*3600 ))
68 [[ -z $ALIEN_ROOT && -n $ALIEN_DIR ]] && ALIEN_ROOT=$ALIEN_DIR
69 #if ! haveAlienToken; then
70 # $ALIEN_ROOT/api/bin/alien-token-destroy
71 $ALIEN_ROOT/api/bin/alien-token-init $alienUserName
73 #if ! haveAlienToken; then
74 # if [[ $allOutputToLog -eq 1 ]]; then
77 # echo "problems getting token! exiting..." | tee -a $logFile
80 #ls -ltr /tmp/gclient_env_$UID
81 #cat /tmp/gclient_env_$UID
82 source /tmp/gclient_env_$UID
84 #set a default timeout for grid access
85 [[ -z $copyTimeout ]] && copyTimeout=600
86 export GCLIENT_COMMAND_MAXWAIT=$copyTimeout
88 localAlienDatabase=$logOutputPath/localAlienDatabase.list
89 localFileList=$logOutputPath/localFile.list
91 alienFileListCurrent=$logOutputPath/alienFileDatabase.list
92 [[ ! -f $localFileList ]] && touch $localFileList
93 candidateLocalFileDatabase=$logOutputPath/candidateLocalFileDatabase.list
95 #here we produce the current alien file list
96 if [[ -n ${useExistingAlienFileDatabase} && -f ${localAlienDatabase} ]]; then
98 echo "using ${localAlienDatabase} instead of full alien search"
99 echo cp -f ${localAlienDatabase} ${alienFileListCurrent}
100 cp -f ${localAlienDatabase} ${alienFileListCurrent}
103 echo "eval $alienFindCommand > $alienFileListCurrent"
104 eval "$alienFindCommand" > $alienFileListCurrent
107 echo "number of files in the collection: $(wc -l $alienFileListCurrent)"
108 #create a list of candidate destination locations
109 #this is in case there are more files on alien trying to get to the same local destination
110 #in which case we take the one with the youngest ctime (later in code)
111 if [[ -n ${destinationModifyCommand} ]]; then
112 echo eval "cat $alienFileListCurrent | ${destinationModifyCommand} | sed \"s,^,${localPathPrefix},\" > ${candidateLocalFileDatabase}"
113 eval "cat $alienFileListCurrent | ${destinationModifyCommand} | sed \"s,^,${localPathPrefix},\" > ${candidateLocalFileDatabase}"
116 #logic is: if file list is missing we force the md5 recalculation
117 [[ ! -f $localAlienDatabase ]] && forceLocalMD5recalculation=1 && echo "forcing local MD5 sum recalculation" && cp -f $alienFileListCurrent $localAlienDatabase
119 #since we grep through the files frequently, copy some stuff to tmpfs for fast access
120 tmp=$(mktemp -d 2>/dev/null)
121 if [[ -d $tmp ]]; then
122 cp $localAlienDatabase $tmp
123 cp $localFileList $tmp
124 cp $alienFileListCurrent $tmp
125 [[ -f ${candidateLocalFileDatabase} ]] && cp ${candidateLocalFileDatabase} ${tmp}
130 echo "starting downloading:"
134 downloadedFileCounter=0
135 while read -r alienFile md5alien timestamp size
139 #sometimes the md5 turns out empty and is then stored as a "." to avoid problems parsing
140 [[ "$md5alien" =~ "." ]] && md5alien=""
142 [[ -n $timeStampInLog ]] && date
143 [[ $SECONDS -ge $secondsToSuicide ]] && echo "$SECONDS seconds passed, exiting by suicide..." && break
144 [[ "$alienFile" != "/"*"/"?* ]] && echo "WARNING: read line not path-like: $alienFile" && continue
145 ((alienFileCounter++))
146 destination=${localPathPrefix}/${alienFile}
147 destination=${destination//\/\///} #remove double slashes
148 [[ -n ${destinationModifyCommand} ]] && destination=$( eval "echo ${destination} | ${destinationModifyCommand}" )
149 destinationdir=${destination%/*}
150 [[ -n $softLinkName ]] && softlinktodestination=${destinationdir}/${softLinkName}
151 tmpdestination="${destination}.aliensyncTMP"
153 if [[ -n ${destinationModifyCommand} ]]; then
154 #find the candidate in the database, in case there are more files trying to go to the same
155 #place due to $destinationModifyCommand which alters the final path, find the one
156 #with the largest ctime (3rd field in the database list) and check if that is the current one
158 #echo grep -n ${destination} $candidateLocalFileDatabase | sed "s/:/ /" | sort -rk4
159 #grep -n ${destination} $candidateLocalFileDatabase| sed "s/:/ /" | sort -rk4
160 #this guy contains: index of the original entry, local file name, md5, ctime
161 candidateDBrecord=($(grep -n ${destination} $tmp/${candidateLocalFileDatabase##*/}| sed "s/:/ /" | sort -rk4|head -n1 ))
162 originalEntryIndex=${candidateDBrecord[0]}
163 [[ $lineNumber -ne $originalEntryIndex ]] && continue
167 if [[ -f ${destination} ]]; then
168 #soft link the downloaded file (maybe to provide a consistent link to the latest version)
169 if [[ -n $softlinktodestination ]]; then
170 echo ln -sf ${destination} ${softlinktodestination}
171 ln -sf ${destination} ${softlinktodestination}
173 ((localFileCounter++))
175 localDBrecord=($(grep $alienFile $tmp/${localAlienDatabase##*/}))
176 md5local=${localDBrecord[1]}
178 #sometimes the md5 turns out empty and is then stored as a "." to avoid problems parsing
179 [[ "$md5local" =~ "." ]] && md5local=""
181 if [[ $forceLocalMD5recalculation -eq 1 || -z $md5local ]]; then
182 tmparrayMD5=($(md5sum ${destination}))
183 md5recalculated=${tmparrayMD5[0]}
184 [[ "$md5local" != "$md5recalculated" ]] && echo "WARNING: local copy change ${destination}"
185 md5local=${md5recalculated}
187 if [[ "$md5local" == "$md5alien" && -n $md5alien ]]; then
188 echo "OK ${destination} $md5alien"
189 if ! grep -q ${destination} $tmp/${localFileList##*/}; then
190 echo ${destination} >> $localFileList
194 if [[ -z $md5alien ]]; then
195 if ! grep -q ${destination} $tmp/${localFileList##*/}; then
196 echo ${destination} >> $localFileList
198 echo "WARNING: missing alien md5, leaving the local file as it is"
201 echo "WARNING: md5 mismatch ${destination}"
202 echo " $md5local $md5alien"
206 [[ -f $tmpdestination ]] && echo "WARNING: stale $tmpdestination, removing" && rm $tmpdestination
208 sg ${alienSyncFilesGroupOwnership} "mkdir -p ${destinationdir}"
209 [[ ! -d $destinationdir ]] && echo cannot access $destinationdir && continue
212 #if ! haveAlienToken; then
213 # $ALIEN_ROOT/api/bin/alien-token-init $alienUserName
214 # #source /tmp/gclient_env_$UID
220 export copyTimeoutHard
221 echo copyFromAlien "$alienFile" "$tmpdestination"
222 [[ $pretend -eq 1 ]] && continue
223 copyFromAlien $alienFile $tmpdestination
224 chgrp ${alienSyncFilesGroupOwnership} $tmpdestination
226 # if we didn't download remove the destination in case we tried to redownload
228 [[ ! -f $tmpdestination ]] && echo "file not downloaded" && rm -f ${destination} && continue
231 #verify the downloaded md5 if available, validate otherwise...
232 if [[ -n $md5alien ]]; then
233 if (echo "$md5alien $tmpdestination"|md5sum -c --status -); then
234 echo "OK md5 after download"
237 echo "failed verifying md5 $md5alien of $tmpdestination"
243 #handle zip files - check the checksums
244 if [[ $alienFile =~ '.zip' && $downloadOK -eq 1 ]]; then
245 echo "checking integrity of zip archive $tmpdestination"
246 if unzip -t $tmpdestination; then
253 if [[ $downloadOK -eq 1 ]]; then
254 echo mv $tmpdestination ${destination}
255 mv $tmpdestination ${destination}
256 chgrp ${alienSyncFilesGroupOwnership} ${destination}
257 ((downloadedFileCounter++))
258 if [[ -n $softlinktodestination ]]; then
259 echo ln -s ${destination} $softlinktodestination
260 ln -s ${destination} $softlinktodestination
262 [[ -z $redownloading ]] && echo ${destination} >> $newFilesList
263 [[ -n $redownloading ]] && echo ${destination} >> $redoneFilesList
264 if ! grep -q ${destination} $tmp/${localFileList##*/}; then
265 echo ${destination} >> $localFileList
267 [[ -n ${postCommand} ]] && ( cd ${destinationdir}; eval "${postCommand}" )
269 echo "download not validated, NOT moving to ${destination}..."
270 echo "removing $tmpdestination"
271 rm -f $tmpdestination
275 if [[ $unzipFiles -eq 1 ]]; then
276 echo unzip $tmpdestination -d $destinationdir
277 unzip $tmpdestination -d $destinationdir
281 done < ${alienFileListCurrent}
283 [[ $alienFileCounter -gt 0 ]] && mv -f $alienFileListCurrent $localAlienDatabase
287 if [[ $allOutputToLog -eq 1 ]]; then
291 cat ${newFilesList} ${redoneFilesList} > ${updatedFilesList}
294 echo alienFindCommand:
295 echo " $alienFindCommand"
297 echo "files on alien: $alienFileCounter"
298 echo "local files before: $localFileCounter"
299 echo "files downloaded: $downloadedFileCounter"
309 [[ -n $sendMailTo ]] && echo $logFile | mail -s "alienSync $alienFindCommand done" $sendMailTo
317 echo removing $lockFile
326 # like a regular alien_find command
327 # output is a list with md5sums and ctimes
328 executable="$ALIEN_ROOT/api/bin/gbbox find"
329 [[ ! -x ${executable% *} ]] && echo "### error, no $executable..." && return 1
330 [[ -z $logOutputPath ]] && logOutputPath="./"
332 maxCollectionLength=10000
334 export GCLIENT_COMMAND_MAXWAIT=600
335 export GCLIENT_COMMAND_RETRY=20
336 export GCLIENT_SERVER_RESELECT=4
337 export GCLIENT_SERVER_RECONNECT=2
338 export GCLIENT_RETRY_DAMPING=1.2
339 export GCLIENT_RETRY_SLEEPTIME=2
342 numberOfFiles=$maxCollectionLength
343 rm -f $logOutputPath/alien_find.err
344 while [[ $numberOfFiles -ge $maxCollectionLength && $iterationNumber -lt 100 ]]; do
346 offset=$((maxCollectionLength*iterationNumber-1));
347 [[ $offset -lt 0 ]] && offset=0;
348 $executable -x coll -l ${maxCollectionLength} -o ${offset} "$@" 2>>$logOutputPath/alien_find.err \
349 | while read -a fields;
351 nfields=${#fields[*]}
356 for ((x=1;x<=${nfields};x++)); do
357 field=${fields[${x}]}
358 if [[ "${field}" == "md5="* ]]; then
361 if [[ "${field}" == "turl="* ]]; then
364 if [[ "${field}" == "ctime="* ]]; then
365 eval ${field}" "${fields[((x+1))]}
367 if [[ "${field}" == "size="* ]]; then
368 eval ${field}" "${fields[((x+1))]}
371 ctime=$( date -d "${ctime}" +%s 2>/dev/null)
372 [[ -z $md5 ]] && md5="."
373 [[ -n "$turl" ]] && echo "${turl//"alien://"/} ${md5} ${ctime} ${size}" && ((numberOfFiles++))
375 ((iterationNumber++))
382 #split the search in sub searches in the subdirectories of the base path
385 subPathSelection=${3}
386 [[ -z ${subPathSelection} ]] && subPathSelection=".*"
387 gbbox ls ${basePath} 2>/dev/null | \
388 while read subPath; do
389 [[ ! ${subPath} =~ ${subPathSelection} ]] && continue
390 alien_find ${basePath}/${subPath} ${searchTerm}
394 listCollectionContents()
396 #find the xml collections and print the list of filenames and hashes
397 while read -a fields; do
398 nfields=${#fields[*]}
402 for ((x=1;x<=${nfields};x++)); do
403 field=${fields[${x}]}
404 if [[ "${field}" == "md5="* ]]; then
407 if [[ "${field}" == "turl="* ]]; then
410 if [[ "${field}" == "ctime="* ]]; then
411 eval "${field} ${fields[((x+1))]}"
414 ctime=$( date -d "${ctime}" +%s 2>/dev/null)
415 [[ -n "$turl" ]] && echo "${turl//"alien://"/} ${md5} ${ctime}"
416 done < <(catCollections $1 $2 2>/dev/null)
421 #print the contents of collection(s)
422 if [[ $# -eq 2 ]]; then
423 while read collection; do
424 [[ $collection != "/"*"/"?* ]] && continue
425 gbbox cat $collection
426 done < <(alien_find $1 $2)
427 elif [[ $# -eq 1 ]]; then
434 #only get a new token if the old one expires soon
436 [[ -z $maxExpireTime ]] && maxExpireTime=4000
437 [[ -z $ALIEN_ROOT ]] && echo "no ALIEN_ROOT!" && return 1
439 tokenExpirationTime=$($ALIEN_ROOT/api/bin/alien-token-info|grep Expires)
440 tokenExpirationTime=$(date -d "${tokenExpirationTime#*:}" "+%s")
441 secondsToExpire=$(( tokenExpirationTime-now ))
442 if [[ $secondsToExpire -lt $maxExpireTime ]]; then
445 echo "token valid for another $secondsToExpire seconds"
452 #copy the file $1 to $2 using a specified method
453 #uses the "timeout" command to make sure the
454 #download processes will not hang forever.
456 [[ -z $copyTimeout ]] && copyTimeout=600
457 [[ -z $copyTimeoutHard ]] && copyTimeoutHard=1200
458 src=${1//"alien://"/}
461 if [[ "$copyMethod" == "tfilecp" ]]; then
462 echo timeout $copyTimeout root -b -q "$copyScript(\"$src\",\"$dst\")"
463 timeout $copyTimeout root -b -q "$copyScript(\"$src\",\"$dst\")"
465 echo timeout $copyTimeout $ALIEN_ROOT/api/bin/alien_cp $src $dst
466 timeout $copyTimeout $ALIEN_ROOT/api/bin/alien_cp $src $dst
475 secondsToSuicide=$(( 10*3600 ))
476 localPathPrefix="${PWD}"
477 logOutputPath="${PWD}/alienSyncLogs"
483 #first, check if the config file is configured
484 #is yes - source it so that other options can override it
486 for opt in "${args[@]}"; do
487 if [[ ${opt} =~ configFile=.* ]]; then
489 [[ ! -f ${configFile} ]] && echo "configFile ${configFile} not found, exiting..." && return 1
490 echo "using config file: ${configFile}"
491 source "${configFile}"
496 #then, parse the options as they override the options from file
497 for opt in "${args[@]}"; do
498 if [[ ! "${opt}" =~ .*=.* ]]; then
499 echo "badly formatted option ${var}, should be: option=value, stopping..."
502 local var="${opt%%=*}"
503 local value="${opt#*=}"
504 echo "${var} = ${value}"
505 export ${var}="${value}"