3 # - script to sync a group of files on alien with a local cache
4 # downloads only new and updated files
5 # - by default it mirrors the directory structure in a specified local location
6 # (the local chache location and paths can be manipulated.)
7 # - needs a configured config file (by default alienSync.config)
8 # and a working alien environment (token and at least $ALIEN_DIR or $ALIEN_ROOT set)
10 # origin: Mikolaj Krzewicki, mikolaj.krzewicki@cern.ch
14 if [[ $# -lt 1 ]]; then
15 echo Usage: $0 configFile
19 # try to load the config file
20 [[ ! -f $1 ]] && echo "config file $1 not found, exiting..." | tee -a $logFile && exit 1
23 #if not set, use the default group
24 [[ -z ${alienSyncFilesGroupOwnership} ]] && alienSyncFilesGroupOwnership=$(id -gn)
27 [[ ! -d $logOutputPath ]] && echo "logOutputPath not available, creating..." && sg ${alienSyncFilesGroupOwnership} "mkdir -p $logOutputPath"
28 [[ ! -d $logOutputPath ]] && echo "could not create log dir, exiting..." && exit 1
29 dateString=$(date +%Y-%m-%d-%H-%M)
30 logFile=$logOutputPath/alienSync-$dateString.log
31 echo "$0 $@"|tee -a $logFile
32 echo ""|tee -a $logFile
35 #be nice and allow group members access as well (002 will create dirs with 775 and files with 664)
39 lockFile=$logOutputPath/runningNow.lock
40 [[ -f $lockFile ]] && echo "locked. Another process running? ($lockFile)" | tee -a $logFile && exit 1
42 [[ ! -f $lockFile ]] && echo "unable to create lock. exiting..." | tee -a $logFile && exit 1
44 #redirect all output to a log
45 if [[ $allOutputToLog -eq 1 ]]; then
50 newFilesList=$logOutputPath/"newFiles.list"
53 redoneFilesList=$logOutputPath/"redoneFiles.list"
54 rm -f $redoneFilesList
55 touch $redoneFilesList
56 updatedFilesList="${logOutputPath}/updatedFiles.list"
59 [[ -z $alienFindCommand ]] && echo "alienFindCommand not defined, exiting..." && exitScript 1
60 [[ -z ${localPathPrefix} ]] && echo "localPathPrefix not defined, exiting..." && exitScript 1
61 [[ -z $logOutputPath ]] && echo "logOutputPath not defined, exiting..." && exitScript 1
62 [[ -z $secondsToSuicide ]] && echo "setting default secondsToSuicide of 10 hrs..." && secondsToSuicide=$(( 10*3600 ))
65 [[ -z $ALIEN_ROOT && -n $ALIEN_DIR ]] && ALIEN_ROOT=$ALIEN_DIR
66 #if ! haveAlienToken; then
67 # $ALIEN_ROOT/api/bin/alien-token-destroy
68 $ALIEN_ROOT/api/bin/alien-token-init $alienUserName
70 #if ! haveAlienToken; then
71 # if [[ $allOutputToLog -eq 1 ]]; then
74 # echo "problems getting token! exiting..." | tee -a $logFile
77 #ls -ltr /tmp/gclient_env_$UID
78 #cat /tmp/gclient_env_$UID
79 source /tmp/gclient_env_$UID
81 #set a default timeout for grid access
82 [[ -z $copyTimeout ]] && copyTimeout=600
83 export GCLIENT_COMMAND_MAXWAIT=$copyTimeout
85 localAlienDatabase=$logOutputPath/localAlienDatabase.list
86 localFileList=$logOutputPath/localFile.list
88 alienFileListCurrent=$logOutputPath/alienFileDatabase.list
89 [[ ! -f $localFileList ]] && touch $localFileList
90 candidateLocalFileDatabase=$logOutputPath/candidateLocalFileDatabase.list
92 #here we produce the current alien file list
93 if [[ -n ${useExistingAlienFileDatabase} && -f ${localAlienDatabase} ]]; then
95 echo "using ${localAlienDatabase} instead of full alien search"
96 echo cp -f ${localAlienDatabase} ${alienFileListCurrent}
97 cp -f ${localAlienDatabase} ${alienFileListCurrent}
100 echo "eval $alienFindCommand > $alienFileListCurrent"
101 eval "$alienFindCommand" > $alienFileListCurrent
104 echo "number of files in the collection: $(wc -l $alienFileListCurrent)"
105 #create a list of candidate destination locations
106 #this is in case there are more files on alien trying to get to the same local destination
107 #in which case we take the one with the youngest ctime (later in code)
108 if [[ -n ${destinationModifyCommand} ]]; then
109 echo eval "cat $alienFileListCurrent | ${destinationModifyCommand} | sed \"s,^,${localPathPrefix},\" > ${candidateLocalFileDatabase}"
110 eval "cat $alienFileListCurrent | ${destinationModifyCommand} | sed \"s,^,${localPathPrefix},\" > ${candidateLocalFileDatabase}"
113 #logic is: if file list is missing we force the md5 recalculation
114 [[ ! -f $localAlienDatabase ]] && forceLocalMD5recalculation=1 && echo "forcing local MD5 sum recalculation" && cp -f $alienFileListCurrent $localAlienDatabase
116 #since we grep through the files frequently, copy some stuff to tmpfs for fast access
117 tmp=$(mktemp -d 2>/dev/null)
118 if [[ -d $tmp ]]; then
119 cp $localAlienDatabase $tmp
120 cp $localFileList $tmp
121 cp $alienFileListCurrent $tmp
122 [[ -f ${candidateLocalFileDatabase} ]] && cp ${candidateLocalFileDatabase} ${tmp}
127 echo "starting downloading:"
131 downloadedFileCounter=0
132 while read -r alienFile md5alien timestamp size
136 #sometimes the md5 turns out empty and is then stored as a "." to avoid problems parsing
137 [[ "$md5alien" =~ "." ]] && md5alien=""
139 [[ -n $timeStampInLog ]] && date
140 [[ $SECONDS -ge $secondsToSuicide ]] && echo "$SECONDS seconds passed, exiting by suicide..." && break
141 [[ "$alienFile" != "/"*"/"?* ]] && echo "WARNING: read line not path-like: $alienFile" && continue
142 ((alienFileCounter++))
143 destination=${localPathPrefix}/${alienFile}
144 destination=${destination//\/\///} #remove double slashes
145 [[ -n ${destinationModifyCommand} ]] && destination=$( eval "echo ${destination} | ${destinationModifyCommand}" )
146 destinationdir=${destination%/*}
147 [[ -n $softLinkName ]] && softlinktodestination=${destinationdir}/${softLinkName}
148 tmpdestination="${destination}.aliensyncTMP"
150 if [[ -n ${destinationModifyCommand} ]]; then
151 #find the candidate in the database, in case there are more files trying to go to the same
152 #place due to $destinationModifyCommand which alters the final path, find the one
153 #with the largest ctime (3rd field in the database list) and check if that is the current one
155 #echo grep -n ${destination} $candidateLocalFileDatabase | sed "s/:/ /" | sort -rk4
156 #grep -n ${destination} $candidateLocalFileDatabase| sed "s/:/ /" | sort -rk4
157 #this guy contains: index of the original entry, local file name, md5, ctime
158 candidateDBrecord=($(grep -n ${destination} $tmp/${candidateLocalFileDatabase##*/}| sed "s/:/ /" | sort -rk4|head -n1 ))
159 originalEntryIndex=${candidateDBrecord[0]}
160 [[ $lineNumber -ne $originalEntryIndex ]] && continue
164 if [[ -f ${destination} ]]; then
165 #soft link the downloaded file (maybe to provide a consistent link to the latest version)
166 if [[ -n $softlinktodestination ]]; then
167 echo ln -sf ${destination} ${softlinktodestination}
168 ln -sf ${destination} ${softlinktodestination}
170 ((localFileCounter++))
172 localDBrecord=($(grep $alienFile $tmp/${localAlienDatabase##*/}))
173 md5local=${localDBrecord[1]}
175 #sometimes the md5 turns out empty and is then stored as a "." to avoid problems parsing
176 [[ "$md5local" =~ "." ]] && md5local=""
178 if [[ $forceLocalMD5recalculation -eq 1 || -z $md5local ]]; then
179 tmparrayMD5=($(md5sum ${destination}))
180 md5recalculated=${tmparrayMD5[0]}
181 [[ "$md5local" != "$md5recalculated" ]] && echo "WARNING: local copy change ${destination}"
182 md5local=${md5recalculated}
184 if [[ "$md5local" == "$md5alien" && -n $md5alien ]]; then
185 echo "OK ${destination} $md5alien"
186 if ! grep -q ${destination} $tmp/${localFileList##*/}; then
187 echo ${destination} >> $localFileList
191 if [[ -z $md5alien ]]; then
192 if ! grep -q ${destination} $tmp/${localFileList##*/}; then
193 echo ${destination} >> $localFileList
195 echo "WARNING: missing alien md5, leaving the local file as it is"
198 echo "WARNING: md5 mismatch ${destination}"
199 echo " $md5local $md5alien"
203 [[ -f $tmpdestination ]] && echo "WARNING: stale $tmpdestination, removing" && rm $tmpdestination
205 sg ${alienSyncFilesGroupOwnership} "mkdir -p ${destinationdir}"
206 [[ ! -d $destinationdir ]] && echo cannot access $destinationdir && continue
209 #if ! haveAlienToken; then
210 # $ALIEN_ROOT/api/bin/alien-token-init $alienUserName
211 # #source /tmp/gclient_env_$UID
217 export copyTimeoutHard
218 echo copyFromAlien "$alienFile" "$tmpdestination"
219 [[ $pretend -eq 1 ]] && continue
220 copyFromAlien $alienFile $tmpdestination
221 chgrp ${alienSyncFilesGroupOwnership} $tmpdestination
223 # if we didn't download remove the destination in case we tried to redownload
225 [[ ! -f $tmpdestination ]] && echo "file not downloaded" && rm -f ${destination} && continue
228 #verify the downloaded md5 if available, validate otherwise...
229 if [[ -n $md5alien ]]; then
230 if (echo "$md5alien $tmpdestination"|md5sum -c --status -); then
231 echo "OK md5 after download"
234 echo "failed verifying md5 $md5alien of $tmpdestination"
240 #handle zip files - check the checksums
241 if [[ $alienFile =~ '.zip' && $downloadOK -eq 1 ]]; then
242 echo "checking integrity of zip archive $tmpdestination"
243 if unzip -t $tmpdestination; then
250 if [[ $downloadOK -eq 1 ]]; then
251 echo mv $tmpdestination ${destination}
252 mv $tmpdestination ${destination}
253 chgrp ${alienSyncFilesGroupOwnership} ${destination}
254 ((downloadedFileCounter++))
255 if [[ -n $softlinktodestination ]]; then
256 echo ln -s ${destination} $softlinktodestination
257 ln -s ${destination} $softlinktodestination
259 [[ -z $redownloading ]] && echo ${destination} >> $newFilesList
260 [[ -n $redownloading ]] && echo ${destination} >> $redoneFilesList
261 if ! grep -q ${destination} $tmp/${localFileList##*/}; then
262 echo ${destination} >> $localFileList
264 [[ -n ${postCommand} ]] && ( cd ${destinationdir}; eval "${postCommand}" )
266 echo "download not validated, NOT moving to ${destination}..."
267 echo "removing $tmpdestination"
268 rm -f $tmpdestination
272 if [[ $unzipFiles -eq 1 ]]; then
273 echo unzip $tmpdestination -d $destinationdir
274 unzip $tmpdestination -d $destinationdir
278 done < ${alienFileListCurrent}
280 [[ $alienFileCounter -gt 0 ]] && mv -f $alienFileListCurrent $localAlienDatabase
284 if [[ $allOutputToLog -eq 1 ]]; then
288 cat ${newFilesList} ${redoneFilesList} > ${updatedFilesList}
291 echo alienFindCommand:
292 echo " $alienFindCommand"
294 echo "files on alien: $alienFileCounter"
295 echo "local files before: $localFileCounter"
296 echo "files downloaded: $downloadedFileCounter"
306 [[ -n $sendMailTo ]] && echo $logFile | mail -s "alienSync $alienFindCommand done" $sendMailTo
314 echo removing $lockFile
323 # like a regular alien_find command
324 # output is a list with md5sums and ctimes
325 executable="$ALIEN_ROOT/api/bin/gbbox find"
326 [[ ! -x ${executable% *} ]] && echo "### error, no $executable..." && return 1
327 [[ -z $logOutputPath ]] && logOutputPath="./"
329 maxCollectionLength=10000
331 export GCLIENT_COMMAND_MAXWAIT=600
332 export GCLIENT_COMMAND_RETRY=20
333 export GCLIENT_SERVER_RESELECT=4
334 export GCLIENT_SERVER_RECONNECT=2
335 export GCLIENT_RETRY_DAMPING=1.2
336 export GCLIENT_RETRY_SLEEPTIME=2
339 numberOfFiles=$maxCollectionLength
340 rm -f $logOutputPath/alien_find.err
341 while [[ $numberOfFiles -ge $maxCollectionLength && $iterationNumber -lt 100 ]]; do
343 offset=$((maxCollectionLength*iterationNumber-1));
344 [[ $offset -lt 0 ]] && offset=0;
345 $executable -x coll -l ${maxCollectionLength} -o ${offset} "$@" 2>>$logOutputPath/alien_find.err \
346 | while read -a fields;
348 nfields=${#fields[*]}
353 for ((x=1;x<=${nfields};x++)); do
354 field=${fields[${x}]}
355 if [[ "${field}" == "md5="* ]]; then
358 if [[ "${field}" == "turl="* ]]; then
361 if [[ "${field}" == "ctime="* ]]; then
362 eval ${field}" "${fields[((x+1))]}
364 if [[ "${field}" == "size="* ]]; then
365 eval ${field}" "${fields[((x+1))]}
368 ctime=$( date -d "${ctime}" +%s 2>/dev/null)
369 [[ -z $md5 ]] && md5="."
370 [[ -n "$turl" ]] && echo "${turl//"alien://"/} ${md5} ${ctime} ${size}" && ((numberOfFiles++))
372 ((iterationNumber++))
379 #split the search in sub searches in the subdirectories of the base path
382 subPathSelection=${3}
383 [[ -z ${subPathSelection} ]] && subPathSelection=".*"
384 gbbox ls ${basePath} 2>/dev/null | \
385 while read subPath; do
386 [[ ! ${subPath} =~ ${subPathSelection} ]] && continue
387 alien_find ${basePath}/${subPath} ${searchTerm}
391 listCollectionContents()
393 #find the xml collections and print the list of filenames and hashes
394 while read -a fields; do
395 nfields=${#fields[*]}
399 for ((x=1;x<=${nfields};x++)); do
400 field=${fields[${x}]}
401 if [[ "${field}" == "md5="* ]]; then
404 if [[ "${field}" == "turl="* ]]; then
407 if [[ "${field}" == "ctime="* ]]; then
408 eval "${field} ${fields[((x+1))]}"
411 ctime=$( date -d "${ctime}" +%s 2>/dev/null)
412 [[ -n "$turl" ]] && echo "${turl//"alien://"/} ${md5} ${ctime}"
413 done < <(catCollections $1 $2 2>/dev/null)
418 #print the contents of collection(s)
419 if [[ $# -eq 2 ]]; then
420 while read collection; do
421 [[ $collection != "/"*"/"?* ]] && continue
422 gbbox cat $collection
423 done < <(alien_find $1 $2)
424 elif [[ $# -eq 1 ]]; then
431 #only get a new token if the old one expires soon
433 [[ -z $maxExpireTime ]] && maxExpireTime=4000
434 [[ -z $ALIEN_ROOT ]] && echo "no ALIEN_ROOT!" && return 1
436 tokenExpirationTime=$($ALIEN_ROOT/api/bin/alien-token-info|grep Expires)
437 tokenExpirationTime=$(date -d "${tokenExpirationTime#*:}" "+%s")
438 secondsToExpire=$(( tokenExpirationTime-now ))
439 if [[ $secondsToExpire -lt $maxExpireTime ]]; then
442 echo "token valid for another $secondsToExpire seconds"
449 #copy the file $1 to $2 using a specified method
450 #uses the "timeout" command to make sure the
451 #download processes will not hang forever.
453 [[ -z $copyTimeout ]] && copyTimeout=600
454 [[ -z $copyTimeoutHard ]] && copyTimeoutHard=1200
455 src=${1//"alien://"/}
458 if [[ "$copyMethod" == "tfilecp" ]]; then
459 echo timeout $copyTimeout root -b -q "$copyScript(\"$src\",\"$dst\")"
460 timeout $copyTimeout root -b -q "$copyScript(\"$src\",\"$dst\")"
462 echo timeout $copyTimeout $ALIEN_ROOT/api/bin/alien_cp $src $dst
463 timeout $copyTimeout $ALIEN_ROOT/api/bin/alien_cp $src $dst