00001 #!/usr/local/bin/python
00002 #
00003 # Copyright 2005
00004 # by
00005 # The Board of Trustees of the
00006 # Leland Stanford Junior University.
00007 # All rights reserved.
00008 #
00009
00010 __facility__ = "Online"
00011 __abstract__ = "GLAST LAT data export routines"
00012 __author__ = "J. Panetta <panetta@slac.stanford.edu> SLAC - GLAST LAT I&T/Online"
00013 __date__ = ("$Date: 2006/01/31 17:24:49 $").split(' ')[1]
00014 __version__ = "$Revision: 2.8 $"
00015 __release__ = "$Name: R04-12-00 $"
00016 __credits__ = "SLAC"
00017
00018 import LATTE.copyright_SLAC
00019
00020 import exceptions
00021 import md5
00022 import os
00023 import shutil
00024 import stat
00025 import sys
00026 import logging as log
00027 from LATTE.tools.FreeSpace import FreeSpace
00028 from LATTE.client.gOptions import Options
00029
00030 class GExportException(exceptions.Exception):
00031 """!\brief Define an exception from the Export system.
00032 """
00033 def __init__(self, status):
00034 self.status = status
00035
00036 def __str__(self):
00037 return repr(self.status)
00038
00039
00040 class DataExport:
00041 """!\brief Class to synch a file or set of files between one location and another.
00042 """
00043 # Error codes
00044 NO_ERROR = 0x0
00045 DIR_NOT_WRITABLE = 0x1
00046 NOT_EXTANT = 0x2
00047
00048 DISK_FULL = 0x10
00049
00050 DISK_FULL_LEVEL = 100*1024*1024 # 100 Megabytes
00051
00052 def __init__(self):
00053 self.__purgeFiles = False
00054
00055 self.transfer = self.__copy # Other valid is self.__rename
00056 pass
00057
00058 def setPurge(self, purge):
00059 """
00060 """
00061 if purge == True:
00062 self.__purgeFiles = True
00063 else:
00064 self.__purgeFiles = False
00065
00066 def synch(self, source, dest):
00067 """!\brief Synch up two directory structures.
00068
00069 Synch only goes in one direction: source --> destination.
00070 """
00071
00072 sourceStatus = self.__checkDir(source)
00073 destStatus = self.__checkDir(dest)
00074
00075 # do something with statuses
00076
00077 # get a list of directories under source. Do this only once, as it can take time
00078 dirList = []
00079 for file in os.listdir(source):
00080 file = os.path.join(source,file)
00081 if os.path.isdir(file):
00082 dirList.append(file)
00083
00084 for sourceDir in dirList:
00085 destDir = os.path.join(dest, os.path.basename(sourceDir))
00086 try:
00087 log.info( "Exporting from %s to %s" % ( sourceDir, destDir ) )
00088 self.exportDir(sourceDir, destDir)
00089 except Exception, e:
00090 log.error( "Failed exporting from %s to %s" % ( sourceDir, destDir ) )
00091 log.error( " Exception: %s" % e )
00092 pass
00093
00094 def export(self, source, dest):
00095 """!\brief Copy from source to dest.
00096
00097 This routine will determine whether source is a file or a directory
00098 """
00099
00100 self.__checkDirs(source, dest)
00101
00102 # Check for common disk on Windows XP/NT (*not* CE).
00103 if os.name == 'nt':
00104 commonPath = os.path.commonprefix( [source, dest] )
00105 if len(commonPath) > 0 and self.__purgeFiles:
00106 self.transfer = self.__rename
00107 else:
00108 self.transfer = self.__copy
00109 else:
00110 pass # self.__transfer is set in constructor to self.__copy
00111
00112 self.transfer(source, dest)
00113
00114 def md5sum(self, fname):
00115 """!\brief Returns an md5 hash for file fname.
00116 """
00117 # md5hash from ActiveState cookbook
00118 # http:
00119 #
00120 # 2004-01-30
00121 #
00122 # Nick Vargish
00123 #
00124 # Simple md5 hash utility for generating md5 checksums of files.
00125 # Modified slightly by Jim Panetta
00126 try:
00127 if os.path.isdir(fname):
00128 return "Directory"
00129 f = file(fname, 'rb')
00130 except:
00131 return 'Failed to open file'
00132 ret = self.__sumfile(f)
00133 f.close()
00134 return ret
00135
00136
00137 def __copy(self, source, dest):
00138 # copy/checksum/delete
00139 if os.path.isdir(source):
00140 shutil.copytree( source, dest ) # copy
00141 self.__compareTree( source, dest ) # checksum
00142
00143 if self.__purgeFiles: # delete
00144 for root, dirs, files in os.walk(source, topdown=False):
00145 for name in files:
00146 f = os.path.join(root, name)
00147 os.chmod(f, stat.S_IWRITE) # Just in case it is read-only
00148 os.remove(f)
00149 for name in dirs:
00150 os.rmdir(os.path.join(root, name))
00151 os.rmdir(root)
00152 else: # regular file. much simpler
00153 shutil.copy( source, dest ) # copy
00154 sourceMD5 = self.md5sum(source) # checksum
00155 destMD5 = self.md5sum(dest)
00156 if sourceMD5 != destMD5:
00157 msg = "md5 sums don't match for %s :" % (source)
00158 msg += "\n source: %s, dest: %s" % ( sourceMD5, destMD5 )
00159 raise GExportException, msg
00160
00161 if self.__purgeFiles: # delete
00162 os.remove(source)
00163
00164 def __rename(self, source, dest):
00165 # just move it. No purging needed
00166 shutil.move(source, dest)
00167
00168 def __checkDirs(self, source, dest):
00169 # check statuses
00170 sourceStatus = self.__checkStat(source)
00171 destStatus = self.__checkStat(dest)
00172
00173 # don't care if source dir is on a full disk or if it isn't writable
00174 if ( sourceStatus & ~(DataExport.DISK_FULL|DataExport.DIR_NOT_WRITABLE) ):
00175 msg = "Export Error: Attempt to read from source %s encountered errors: " % (source)
00176 msg += self.__statusParse(sourceStatus)
00177 raise GExportException, msg
00178 # Destination must not be extant.
00179 if not ( destStatus & DataExport.NOT_EXTANT ):
00180 msg = "Export Error: Destination %s already exists" % (dest)
00181 raise GExportException, msg
00182 if ( destStatus & ~DataExport.NOT_EXTANT ) :
00183 msg = "Export Error: Attempt to write to destination %s encountered errors: " % (dest)
00184 msg += self.__statusParse(destStatus)
00185
00186 raise GExportException, msg
00187 # source and dest can't be the same
00188 if os.path.abspath(source) == os.path.abspath(dest):
00189 msg = "Source and destination directories are the same. Export aborted"
00190 raise GExportException, msg
00191
00192
00193 def __compareTree(self, sourceDir, destDir):
00194 """!\brief Compare all files under sourceDir and destDir with md5.
00195
00196 \param sourceDir Source directory
00197 \param destDir Destination directory
00198 """
00199 # scan the source/dest directories
00200 # chdir so that the source/dest keys are the same
00201 saveDir = os.getcwd()
00202 os.chdir(sourceDir)
00203 sourceWalk = os.walk(".")
00204 sourceMD5 = self.__treeWalk( sourceWalk, self.md5sum )
00205
00206 os.chdir(destDir)
00207 destWalk = os.walk(".")
00208 destMD5 = self.__treeWalk( destWalk, self.md5sum )
00209
00210 # compare the checksums
00211 for file in sourceMD5.keys():
00212 if destMD5.has_key(file):
00213 if sourceMD5[file] != destMD5[file]:
00214 os.chdir(saveDir)
00215 msg = "md5 sums don't match for %s :" % (file)
00216 msg += "\n source: %s, dest: %s" % ( sourceMD5[file], destMD5[file])
00217 raise GExportException, msg
00218 else:
00219 destMD5.pop(file)
00220 else:
00221 os.chdir(saveDir)
00222 msg = "Destination directory %s does not contain file %s from source directory %s" % \
00223 ( destDir, file, sourceDir )
00224 raise GExportException, msg
00225
00226 if destMD5 != {}:
00227 os.chdir(saveDir)
00228 msg = "Destination directory %s contains files not in source directory:" % ( destDir )
00229 for file in destMD5.keys():
00230 msg += "\n %s" % ( file )
00231 raise GExportException, msg
00232 os.chdir(saveDir)
00233
00234 def __treeWalk(self, tree, function):
00235 """!\brief Walk a tree, executing function() on every file.
00236 """
00237 returnDict = {}
00238 for root, dirs, files in tree:
00239 for name in dirs + files:
00240 try:
00241 retVal = function(os.path.join(root, name))
00242 except:
00243 retVal = -1
00244 returnDict[os.path.join(root, name)] = retVal
00245 return returnDict
00246
00247 def __checkStat(self, obj):
00248 """!\brief Check if source and directories exist, are writable,
00249 and the disks aren't full.
00250
00251 \param obj File path
00252 \return Status
00253 """
00254 status = 0L
00255
00256 if not os.path.exists(obj):
00257 status |= DataExport.NOT_EXTANT
00258 elif FreeSpace(os.path.dirname(obj)) < DataExport.DISK_FULL_LEVEL:
00259 status |= DataExport.DISK_FULL
00260 if not os.access(os.path.dirname(obj),os.W_OK):
00261 status |= DataExport.DIR_NOT_WRITABLE
00262
00263 return status
00264
00265 def __makeReadOnly(self, file):
00266 status = 0
00267 try:
00268 os.chmod(file, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
00269 except:
00270 status = -1
00271 return status
00272
00273 def __statusParse(self, status):
00274 msg = ""
00275 if status & DataExport.DIR_NOT_WRITABLE:
00276 msg += "\n Directory not writable"
00277 if status & DataExport.NOT_EXTANT:
00278 msg += "\n Directory does not exist"
00279 if status & DataExport.DISK_FULL:
00280 msg += "\n Disk is full"
00281 return msg
00282
00283 def __sumfile(self, fobj):
00284 '''Returns an md5 hash for an object with read() method.'''
00285 m = md5.new()
00286 while True:
00287 d = fobj.read(8096)
00288 if not d:
00289 break
00290 m.update(d)
00291 return m.hexdigest()
00292
00293
00294
00295 def usage():
00296 return """
00297 DataExport: Securely copy files from one directory to another.
00298
00299 Mandatory options:
00300 --source Source directory
00301 --destination Destination directory
00302
00303 Flags:
00304 --purge Set if source files are to be deleted after copy.
00305 """
00306
00307 if __name__ == "__main__":
00308 """Command line interface for the Data Export system.
00309 Can be called by the user or by an automated process.
00310
00311 """
00312
00313 # Options, first list mandatory, second list optional
00314 options = Options(['source', 'destination'], [], ['purge'])
00315 try:
00316 options.parse()
00317 except Exception, msg:
00318 options.usage(usage())
00319 raise Exception, msg
00320
00321 source = options.source
00322 destination = options.destination
00323 print "source=%s, dest=%s" % ( source, destination)
00324
00325 exporter = DataExport()
00326
00327 # Do we want file deletion?
00328 if options.purge is not None:
00329 exporter.setPurge(True)
00330
00331 exporter.export(source, destination)