]>
Commit | Line | Data |
---|---|---|
d4ab9e58 | 1 | #!/bin/bash |
2 | # | |
3 | # - script to sync a group of files on alien with a local cache | |
4 | # downloads only new and updated files | |
5 | # - by default it mirrors the directory structure in a specified local location | |
6 | # (the local chache location and paths can be manipulated.) | |
7 | # - needs a configured config file (by default alienSync.config) | |
8 | # and a working alien environment (token and at least $ALIEN_DIR or $ALIEN_ROOT set) | |
9 | # | |
10 | # origin: Mikolaj Krzewicki, mikolaj.krzewicki@cern.ch | |
11 | # | |
12 | main() | |
13 | { | |
14 | if [[ $# -lt 1 ]]; then | |
15 | echo Usage: $0 configFile | |
16 | return | |
17 | fi | |
18 | ||
19 | # try to load the config file | |
20 | [[ ! -f $1 ]] && echo "config file $1 not found, exiting..." | tee -a $logFile && exit 1 | |
21 | source $1 | |
22 | ||
23 | # do some accounting | |
24 | [[ ! -d $logOutputPath ]] && echo "logOutputPath not available, creating..." && sg ${alienSyncFilesGroupOwnership} "mkdir -p $logOutputPath" | |
25 | [[ ! -d $logOutputPath ]] && echo "could not create log dir, exiting..." && exit 1 | |
26 | dateString=$(date +%Y-%m-%d-%H-%M) | |
27 | logFile=$logOutputPath/alienSync-$dateString.log | |
28 | echo "$0 $@"|tee -a $logFile | |
29 | echo ""|tee -a $logFile | |
30 | echo log: $logFile | |
31 | ||
32 | #be nice and allow group members access as well (002 will create dirs with 775 and files with 664) | |
33 | umask 0002 | |
34 | ||
35 | #lock | |
36 | lockFile=$logOutputPath/runningNow.lock | |
37 | [[ -f $lockFile ]] && echo "locked. Another process running? ($lockFile)" | tee -a $logFile && exit 1 | |
38 | touch $lockFile | |
39 | [[ ! -f $lockFile ]] && echo "unable to create lock. exiting..." | tee -a $logFile && exit 1 | |
40 | ||
41 | #redirect all output to a log | |
42 | if [[ $allOutputToLog -eq 1 ]]; then | |
43 | exec 6>&1 | |
44 | exec 1>$logFile 2>&1 | |
45 | fi | |
46 | ||
47 | newFilesList=$logOutputPath/"newFiles.list" | |
48 | rm -f $newFilesList | |
49 | touch $newFilesList | |
50 | redoneFilesList=$logOutputPath/"redoneFiles.list" | |
51 | rm -f $redoneFilesList | |
52 | touch $redoneFilesList | |
53 | updatedFilesList="${logOutputPath}/updatedFiles.list" | |
54 | ||
55 | # check the config | |
56 | [[ -z $alienFindCommand ]] && echo "alienFindCommand not defined, exiting..." && exitScript 1 | |
57 | [[ -z ${localPathPrefix} ]] && echo "localPathPrefix not defined, exiting..." && exitScript 1 | |
58 | [[ -z $logOutputPath ]] && echo "logOutputPath not defined, exiting..." && exitScript 1 | |
59 | [[ -z $secondsToSuicide ]] && echo "setting default secondsToSuicide of 10 hrs..." && secondsToSuicide=$(( 10*3600 )) | |
60 | ||
61 | # init alien | |
62 | echo source $alienInitScript | |
63 | source $alienInitScript "" | |
64 | [[ -z $ALIEN_ROOT && -n $ALIEN_DIR ]] && ALIEN_ROOT=$ALIEN_DIR | |
65 | #if ! haveAlienToken; then | |
66 | # $ALIEN_ROOT/api/bin/alien-token-destroy | |
67 | $ALIEN_ROOT/api/bin/alien-token-init $alienUserName | |
68 | #fi | |
69 | #if ! haveAlienToken; then | |
70 | # if [[ $allOutputToLog -eq 1 ]]; then | |
71 | # exec 1>&6 6>&- | |
72 | # fi | |
73 | # echo "problems getting token! exiting..." | tee -a $logFile | |
74 | # exitScript 1 | |
75 | #fi | |
76 | #ls -ltr /tmp/gclient_env_$UID | |
77 | #cat /tmp/gclient_env_$UID | |
78 | #source /tmp/gclient_env_$UID | |
79 | ||
80 | #set a default timeout for grid access | |
81 | [[ -z $copyTimeout ]] && copyTimeout=600 | |
82 | export GCLIENT_COMMAND_MAXWAIT=$copyTimeout | |
83 | ||
84 | localAlienDatabase=$logOutputPath/localAlienDatabase.list | |
85 | localFileList=$logOutputPath/localFile.list | |
86 | ||
87 | alienFileListCurrent=$logOutputPath/alienFileDatabase.list | |
88 | [[ ! -f $localFileList ]] && touch $localFileList | |
89 | candidateLocalFileDatabase=$logOutputPath/candidateLocalFileDatabase.list | |
90 | ||
91 | #here we produce the current alien file list | |
92 | if [[ -n ${useExistingAlienFileDatabase} && -f ${localAlienDatabase} ]]; then | |
93 | #we use the old one | |
94 | echo "using ${localAlienDatabase} instead of full alien search" | |
95 | echo cp -f ${localAlienDatabase} ${alienFileListCurrent} | |
96 | cp -f ${localAlienDatabase} ${alienFileListCurrent} | |
97 | else | |
98 | #we make a new one | |
99 | echo "eval $alienFindCommand > $alienFileListCurrent" | |
100 | eval "$alienFindCommand" > $alienFileListCurrent | |
101 | fi | |
102 | ||
103 | echo "number of files in the collection: $(wc -l $alienFileListCurrent)" | |
104 | #create a list of candidate destination locations | |
105 | #this is in case there are more files on alien trying to get to the same local destination | |
106 | #in which case we take the one with the youngest ctime (later in code) | |
107 | if [[ -n ${destinationModifyCommand} ]]; then | |
108 | echo eval "cat $alienFileListCurrent | ${destinationModifyCommand} | sed \"s,^,${localPathPrefix},\" > ${candidateLocalFileDatabase}" | |
109 | eval "cat $alienFileListCurrent | ${destinationModifyCommand} | sed \"s,^,${localPathPrefix},\" > ${candidateLocalFileDatabase}" | |
110 | fi | |
111 | ||
112 | #logic is: if file list is missing we force the md5 recalculation | |
113 | [[ ! -f $localAlienDatabase ]] && forceLocalMD5recalculation=1 && echo "forcing local MD5 sum recalculation" && cp -f $alienFileListCurrent $localAlienDatabase | |
114 | ||
115 | #since we grep through the files frequently, copy some stuff to tmpfs for fast access | |
116 | tmp=$(mktemp -d 2>/dev/null) | |
117 | if [[ -d $tmp ]]; then | |
118 | cp $localAlienDatabase $tmp | |
119 | cp $localFileList $tmp | |
120 | cp $alienFileListCurrent $tmp | |
121 | [[ -f ${candidateLocalFileDatabase} ]] && cp ${candidateLocalFileDatabase} ${tmp} | |
122 | else | |
123 | tmp=$logOutputPath | |
124 | fi | |
125 | ||
126 | echo "starting downloading:" | |
127 | lineNumber=0 | |
128 | alienFileCounter=0 | |
129 | localFileCounter=0 | |
130 | downloadedFileCounter=0 | |
131 | while read -r alienFile md5alien timestamp size | |
132 | do | |
133 | ((lineNumber++)) | |
134 | ||
135 | #sometimes the md5 turns out empty and is then stored as a "." to avoid problems parsing | |
136 | [[ "$md5alien" =~ "." ]] && md5alien="" | |
137 | ||
138 | [[ -n $timeStampInLog ]] && date | |
139 | [[ $SECONDS -ge $secondsToSuicide ]] && echo "$SECONDS seconds passed, exiting by suicide..." && break | |
140 | [[ "$alienFile" != "/"*"/"?* ]] && echo "WARNING: read line not path-like: $alienFile" && continue | |
141 | ((alienFileCounter++)) | |
142 | destination=${localPathPrefix}/${alienFile} | |
143 | destination=${destination//\/\///} #remove double slashes | |
144 | [[ -n ${destinationModifyCommand} ]] && destination=$( eval "echo ${destination} | ${destinationModifyCommand}" ) | |
145 | destinationdir=${destination%/*} | |
146 | [[ -n $softLinkName ]] && softlinktodestination=${destinationdir}/${softLinkName} | |
147 | tmpdestination="${destination}.aliensyncTMP" | |
148 | ||
149 | if [[ -n ${destinationModifyCommand} ]]; then | |
150 | #find the candidate in the database, in case there are more files trying to go to the same | |
151 | #place due to $destinationModifyCommand which alters the final path, find the one | |
152 | #with the largest ctime (3rd field in the database list) and check if that is the current one | |
153 | #if not - skip | |
154 | #echo grep -n ${destination} $candidateLocalFileDatabase | sed "s/:/ /" | sort -rk4 | |
155 | #grep -n ${destination} $candidateLocalFileDatabase| sed "s/:/ /" | sort -rk4 | |
156 | #this guy contains: index of the original entry, local file name, md5, ctime | |
157 | candidateDBrecord=($(grep -n ${destination} $tmp/${candidateLocalFileDatabase##*/}| sed "s/:/ /" | sort -rk4|head -n1 )) | |
158 | originalEntryIndex=${candidateDBrecord[0]} | |
159 | [[ $lineNumber -ne $originalEntryIndex ]] && continue | |
160 | fi | |
161 | ||
162 | redownloading="" | |
163 | if [[ -f ${destination} ]]; then | |
164 | #if we want the soft links and they are not there for existing files, create them | |
165 | if [[ ! -h "$softlinktodestination" && -n $softLinkName ]]; then | |
166 | echo ln -s ${destination} $softlinktodestination | |
167 | ln -s ${destination} $softlinktodestination | |
168 | fi | |
169 | ((localFileCounter++)) | |
170 | ||
171 | localDBrecord=($(grep $alienFile $tmp/${localAlienDatabase##*/})) | |
172 | md5local=${localDBrecord[1]} | |
173 | ||
174 | #sometimes the md5 turns out empty and is then stored as a "." to avoid problems parsing | |
175 | [[ "$md5local" =~ "." ]] && md5local="" | |
176 | ||
177 | if [[ $forceLocalMD5recalculation -eq 1 || -z $md5local ]]; then | |
178 | tmparrayMD5=($(md5sum ${destination})) | |
179 | md5recalculated=${tmparrayMD5[0]} | |
180 | [[ "$md5local" != "$md5recalculated" ]] && echo "WARNING: local copy change ${destination}" | |
181 | md5local=${md5recalculated} | |
182 | fi | |
183 | if [[ "$md5local" == "$md5alien" && -n $md5alien ]]; then | |
184 | echo "OK ${destination} $md5alien" | |
185 | if ! grep -q ${destination} $tmp/${localFileList##*/}; then | |
186 | echo ${destination} >> $localFileList | |
187 | fi | |
188 | continue | |
189 | fi | |
190 | if [[ -z $md5alien ]]; then | |
191 | if ! grep -q ${destination} $tmp/${localFileList##*/}; then | |
192 | echo ${destination} >> $localFileList | |
193 | fi | |
194 | echo "WARNING: missing alien md5, leaving the local file as it is" | |
195 | continue | |
196 | fi | |
197 | echo "WARNING: md5 mismatch ${destination}" | |
198 | echo " $md5local $md5alien" | |
199 | redownloading=1 | |
200 | fi | |
201 | ||
202 | [[ -f $tmpdestination ]] && echo "WARNING: stale $tmpdestination, removing" && rm $tmpdestination | |
203 | ||
204 | sg ${alienSyncFilesGroupOwnership} "mkdir -p ${destinationdir}" | |
205 | [[ ! -d $destinationdir ]] && echo cannot access $destinationdir && continue | |
206 | ||
207 | #check token | |
208 | #if ! haveAlienToken; then | |
209 | # $ALIEN_ROOT/api/bin/alien-token-init $alienUserName | |
210 | # #source /tmp/gclient_env_$UID | |
211 | #fi | |
212 | ||
213 | export copyMethod | |
214 | export copyScript | |
215 | export copyTimeout | |
216 | export copyTimeoutHard | |
217 | echo copyFromAlien "$alienFile" "$tmpdestination" | |
218 | [[ $pretend -eq 1 ]] && continue | |
219 | copyFromAlien $alienFile $tmpdestination | |
220 | chgrp ${alienSyncFilesGroupOwnership} $tmpdestination | |
221 | ||
222 | # if we didn't download remove the destination in case we tried to redownload | |
223 | # a corrupted file | |
224 | [[ ! -f $tmpdestination ]] && echo "file not downloaded" && rm -f ${destination} && continue | |
225 | ||
226 | downloadOK=0 | |
227 | #verify the downloaded md5 if available, validate otherwise... | |
228 | if [[ -n $md5alien ]]; then | |
229 | if (echo "$md5alien $tmpdestination"|md5sum -c --status -); then | |
230 | echo "OK md5 after download" | |
231 | downloadOK=1 | |
232 | else | |
233 | echo "tried to parse this: $md5alien $tmpdestination" | |
234 | fi | |
235 | else | |
236 | downloadOK=1 | |
237 | fi | |
238 | ||
239 | #handle zip files - check the checksums | |
240 | if [[ $alienFile =~ '.zip' && $downloadOK -eq 1 ]]; then | |
241 | echo "checking integrity of zip archive $tmpdestination" | |
242 | if unzip -t $tmpdestination; then | |
243 | downloadOK=1 | |
244 | else | |
245 | downloadOK=0 | |
246 | fi | |
247 | fi | |
248 | ||
249 | if [[ $downloadOK -eq 1 ]]; then | |
250 | echo mv $tmpdestination ${destination} | |
251 | mv $tmpdestination ${destination} | |
252 | chgrp ${alienSyncFilesGroupOwnership} ${destination} | |
253 | ((downloadedFileCounter++)) | |
254 | if [[ -n $softlinktodestination ]]; then | |
255 | echo ln -s ${destination} $softlinktodestination | |
256 | ln -s ${destination} $softlinktodestination | |
257 | fi | |
258 | [[ -z $redownloading ]] && echo ${destination} >> $newFilesList | |
259 | [[ -n $redownloading ]] && echo ${destination} >> $redoneFilesList | |
260 | if ! grep -q ${destination} $tmp/${localFileList##*/}; then | |
261 | echo ${destination} >> $localFileList | |
262 | fi | |
263 | [[ -n ${postCommand} ]] && ( cd ${destinationdir}; eval "${postCommand}" ) | |
264 | else | |
265 | echo "download not validated, NOT moving to ${destination}..." | |
266 | rm -f $tmpdestination | |
267 | continue | |
268 | fi | |
269 | ||
270 | if [[ $unzipFiles -eq 1 ]]; then | |
271 | echo unzip $tmpdestination -d $destinationdir | |
272 | unzip $tmpdestination -d $destinationdir | |
273 | fi | |
274 | ||
275 | echo | |
276 | done < ${alienFileListCurrent} | |
277 | ||
278 | [[ $alienFileCounter -gt 0 ]] && mv -f $alienFileListCurrent $localAlienDatabase | |
279 | ||
280 | echo ${0##*/} DONE | |
281 | ||
282 | if [[ $allOutputToLog -eq 1 ]]; then | |
283 | exec 1>&6 6>&- | |
284 | fi | |
285 | ||
286 | cat ${newFilesList} ${redoneFilesList} > ${updatedFilesList} | |
287 | eval "${executeEnd}" | |
288 | ||
289 | echo alienFindCommand: | |
290 | echo " $alienFindCommand" | |
291 | echo | |
292 | echo "files on alien: $alienFileCounter" | |
293 | echo "local files before: $localFileCounter" | |
294 | echo "files downloaded: $downloadedFileCounter" | |
295 | echo | |
296 | echo "new files:" | |
297 | echo | |
298 | cat $newFilesList | |
299 | echo | |
300 | echo "redone files:" | |
301 | echo | |
302 | cat $redoneFilesList | |
303 | ||
304 | [[ -n $sendMailTo ]] && echo $logFile | mail -s "alienSync $alienFindCommand done" $sendMailTo | |
305 | ||
306 | exitScript 0 | |
307 | } | |
308 | ||
309 | exitScript() | |
310 | { | |
311 | echo | |
312 | echo removing $lockFile | |
313 | rm -f $lockFile | |
314 | echo removing $tmp | |
315 | rm -rf $tmp | |
316 | exit $1 | |
317 | } | |
318 | ||
319 | alien_find() | |
320 | { | |
321 | # like a regular alien_find command | |
322 | # output is a list with md5sums and ctimes | |
323 | executable="$ALIEN_ROOT/api/bin/gbbox find" | |
324 | [[ ! -x ${executable% *} ]] && echo "### error, no $executable..." && return 1 | |
325 | [[ -z $logOutputPath ]] && logOutputPath="./" | |
326 | ||
327 | maxCollectionLength=10000 | |
328 | ||
329 | export GCLIENT_COMMAND_MAXWAIT=600 | |
330 | export GCLIENT_COMMAND_RETRY=20 | |
331 | export GCLIENT_SERVER_RESELECT=4 | |
332 | export GCLIENT_SERVER_RECONNECT=2 | |
333 | export GCLIENT_RETRY_DAMPING=1.2 | |
334 | export GCLIENT_RETRY_SLEEPTIME=2 | |
335 | ||
336 | iterationNumber=0 | |
337 | numberOfFiles=$maxCollectionLength | |
338 | rm -f $logOutputPath/alien_find.err | |
339 | while [[ $numberOfFiles -ge $maxCollectionLength && $iterationNumber -lt 100 ]]; do | |
340 | numberOfFiles=0 | |
341 | offset=$((maxCollectionLength*iterationNumber-1)); | |
342 | [[ $offset -lt 0 ]] && offset=0; | |
343 | $executable -x coll -l ${maxCollectionLength} -o ${offset} "$@" 2>>$logOutputPath/alien_find.err \ | |
344 | | while read -a fields; | |
345 | do | |
346 | nfields=${#fields[*]} | |
347 | turl="" | |
348 | md5="" | |
349 | ctime="" | |
350 | size="" | |
351 | for ((x=1;x<=${nfields};x++)); do | |
352 | field=${fields[${x}]} | |
353 | if [[ "${field}" == "md5="* ]]; then | |
354 | eval ${field} | |
355 | fi | |
356 | if [[ "${field}" == "turl="* ]]; then | |
357 | eval ${field} | |
358 | fi | |
359 | if [[ "${field}" == "ctime="* ]]; then | |
360 | eval ${field}" "${fields[((x+1))]} | |
361 | fi | |
362 | if [[ "${field}" == "size="* ]]; then | |
363 | eval ${field}" "${fields[((x+1))]} | |
364 | fi | |
365 | done | |
366 | ctime=$( date -d "${ctime}" +%s 2>/dev/null) | |
367 | [[ -z $md5 ]] && md5="." | |
368 | [[ -n "$turl" ]] && echo "${turl//"alien://"/} ${md5} ${ctime} ${size}" && ((numberOfFiles++)) | |
369 | done | |
370 | ((iterationNumber++)) | |
371 | done | |
372 | return 0 | |
373 | } | |
374 | ||
375 | alien_find_split() | |
376 | { | |
377 | #split the search in sub searches in the subdirectories of the base path | |
378 | basePath=${1} | |
379 | searchTerm=${2} | |
380 | subPathSelection=${3} | |
381 | [[ -z ${subPathSelection} ]] && subPathSelection=".*" | |
382 | gbbox ls ${basePath} 2>/dev/null | \ | |
383 | while read subPath; do | |
384 | [[ ! ${subPath} =~ ${subPathSelection} ]] && continue | |
385 | alien_find ${basePath}/${subPath} ${searchTerm} | |
386 | done | |
387 | } | |
388 | ||
389 | listCollectionContents() | |
390 | { | |
391 | #find the xml collections and print the list of filenames and hashes | |
392 | while read -a fields; do | |
393 | nfields=${#fields[*]} | |
394 | turl="" | |
395 | md5="" | |
396 | ctime="" | |
397 | for ((x=1;x<=${nfields};x++)); do | |
398 | field=${fields[${x}]} | |
399 | if [[ "${field}" == "md5="* ]]; then | |
400 | eval ${field} | |
401 | fi | |
402 | if [[ "${field}" == "turl="* ]]; then | |
403 | eval ${field} | |
404 | fi | |
405 | if [[ "${field}" == "ctime="* ]]; then | |
406 | eval "${field} ${fields[((x+1))]}" | |
407 | fi | |
408 | done | |
409 | ctime=$( date -d "${ctime}" +%s 2>/dev/null) | |
410 | [[ -n "$turl" ]] && echo "${turl//"alien://"/} ${md5} ${ctime}" | |
411 | done < <(catCollections $1 $2 2>/dev/null) | |
412 | } | |
413 | ||
414 | catCollections() | |
415 | { | |
416 | #print the contents of collection(s) | |
417 | if [[ $# -eq 2 ]]; then | |
418 | while read collection; do | |
419 | [[ $collection != "/"*"/"?* ]] && continue | |
420 | gbbox cat $collection | |
421 | done < <(alien_find $1 $2) | |
422 | elif [[ $# -eq 1 ]]; then | |
423 | gbbox cat $1 | |
424 | fi | |
425 | } | |
426 | ||
427 | haveAlienToken() | |
428 | { | |
429 | #only get a new token if the old one expires soon | |
430 | maxExpireTime=$1 | |
431 | [[ -z $maxExpireTime ]] && maxExpireTime=4000 | |
432 | [[ -z $ALIEN_ROOT ]] && echo "no ALIEN_ROOT!" && return 1 | |
433 | now=$(date "+%s") | |
434 | tokenExpirationTime=$($ALIEN_ROOT/api/bin/alien-token-info|grep Expires) | |
435 | tokenExpirationTime=$(date -d "${tokenExpirationTime#*:}" "+%s") | |
436 | secondsToExpire=$(( tokenExpirationTime-now )) | |
437 | if [[ $secondsToExpire -lt $maxExpireTime ]]; then | |
438 | return 1 | |
439 | else | |
440 | echo "token valid for another $secondsToExpire seconds" | |
441 | return 0 | |
442 | fi | |
443 | } | |
444 | ||
445 | copyFromAlien() | |
446 | { | |
447 | #copy the file $1 to $2 using a specified method | |
448 | #uses the "timelimit" command to make sure the | |
449 | #download processes will not hang forever. | |
450 | # | |
451 | #("timelimit" prints a default message if it kills the command, | |
452 | #"timeout" does not, but may be more compatible with more | |
453 | #systems as it is a part of coreutils) | |
454 | [[ -z $copyTimeout ]] && copyTimeout=600 | |
455 | [[ -z $copyTimeoutHard ]] && copyTimeoutHard=1200 | |
456 | src=${1//"alien://"/} | |
457 | src="alien://${src}" | |
458 | dst=$2 | |
459 | if [[ "$copyMethod" == "tfilecp" ]]; then | |
460 | echo timelimit -t $copyTimeout -T $copyTimeoutHard root -b -q "$copyScript(\"$src\",\"$dst\")" | |
461 | timelimit -t $copyTimeout -T $copyTimeoutHard root -b -q "$copyScript(\"$src\",\"$dst\")" | |
462 | else | |
463 | echo timelimit -t $copyTimeout -T $copyTimeoutHard $ALIEN_ROOT/api/bin/alien_cp $src $dst | |
464 | timelimit -t $copyTimeout -T $copyTimeoutHard $ALIEN_ROOT/api/bin/alien_cp $src $dst | |
465 | fi | |
466 | } | |
467 | ||
468 | main "$@" |