DataExport.py

Go to the documentation of this file.
00001 #!/usr/local/bin/python
00002 #
00003 #                               Copyright 2005
00004 #                                     by
00005 #                        The Board of Trustees of the
00006 #                     Leland Stanford Junior University.
00007 #                            All rights reserved.
00008 #
00009 
00010 __facility__ = "Online"
00011 __abstract__ = "GLAST LAT data export routines"
00012 __author__   = "J. Panetta <panetta@slac.stanford.edu> SLAC - GLAST LAT I&T/Online"
00013 __date__     = "2005/07/23 00:08:27"
00014 __updated__  = "$Date: 2006/01/20 03:06:58 $"
00015 __version__  = "$Revision: 1.10 $"
00016 __release__  = "$Name: HEAD $"
00017 __credits__  = "SLAC"
00018 
00019 import LICOS.copyright_SLAC
00020 
00021 import exceptions
00022 import md5
00023 import os
00024 import shutil
00025 import stat
00026 import sys
00027 import logging as log
00028 
00029 from   LICOS.util.FreeSpace import FreeSpace
00030 from   LICOS.util.gOptions  import Options
00031 
00032 class GExportException(exceptions.Exception):
00033   """!\brief Define an exception from the Export system.
00034 
00035   """
00036   def __init__(self, status):
00037     self.status = status
00038 
00039   def __str__(self):
00040     return repr(self.status)
00041 
00042 
00043 class DataExport:
00044   """!\brief Class to synch a file or set of files between one location and another.
00045 
00046   """
00047   # Error codes
00048   NO_ERROR                      = 0x0
00049   DIR_NOT_WRITABLE              = 0x1
00050   NOT_EXTANT                    = 0x2
00051 
00052   DISK_FULL                     = 0x10
00053 
00054   DISK_FULL_LEVEL               = 100*1024*1024  # 100 Megabytes
00055 
00056   def __init__(self):
00057     self.__purgeFiles   = False
00058 
00059     self.transfer = self.__copy   # Other valid is self.__rename
00060     pass
00061 
00062   def setPurge(self, purge):
00063     """
00064     """
00065     if purge == True:
00066       self.__purgeFiles = True
00067     else:
00068       self.__purgeFiles = False
00069 
00070   def synch(self, source, dest):
00071     """!\brief Synch up two directory structures.
00072 
00073     Synch only goes in one direction:  source --> destination.
00074     """
00075 
00076     sourceStatus = self.__checkDir(source)
00077     destStatus   = self.__checkDir(dest)
00078 
00079     # do something with statuses
00080 
00081     # get a list of directories under source.  Do this only once, as it can take time
00082     dirList = []
00083     for file in os.listdir(source):
00084       file = os.path.join(source,file)
00085       if os.path.isdir(file):
00086         dirList.append(file)
00087 
00088     for sourceDir in dirList:
00089       destDir = os.path.join(dest, os.path.basename(sourceDir))
00090       try:
00091         log.info( "Exporting from %s to %s" % ( sourceDir, destDir ) )
00092         self.exportDir(sourceDir, destDir)
00093       except Exception, e:
00094         log.error( "Failed exporting from %s to %s" % ( sourceDir, destDir ) )
00095         log.error( "  Exception: %s" % e )
00096         pass
00097 
00098   def export(self, source, dest):
00099     """!\brief Copy from source to dest.
00100 
00101     This routine will determine whether source is a file or a directory
00102     """
00103 
00104     self.__checkDirs(source, dest)
00105 
00106     # Check for common disk on Windows XP/NT (*not* CE).
00107     if os.name == 'nt':
00108       commonPath = os.path.commonprefix( [source, dest] )
00109       if len(commonPath) > 0 and self.__purgeFiles:
00110         self.transfer = self.__rename
00111       else:
00112         self.transfer = self.__copy
00113     else:
00114       pass # self.__transfer is set in constructor to self.__copy
00115 
00116     self.transfer(source, dest)
00117 
00118   def md5sum(self, fname):
00119     """!\brief Returns an md5 hash for file fname.
00120 
00121     """
00122     # md5hash from ActiveState cookbook
00123     # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/266486
00124     #
00125     # 2004-01-30
00126     #
00127     # Nick Vargish
00128     #
00129     # Simple md5 hash utility for generating md5 checksums of files.
00130     # Modified slightly by Jim Panetta
00131     try:
00132       if os.path.isdir(fname):
00133         return "Directory"
00134       f = file(fname, 'rb')
00135     except:
00136       return 'Failed to open file'
00137     ret = self.__sumfile(f)
00138     f.close()
00139     return ret
00140 
00141 
00142   def __copy(self, source, dest):
00143     # copy/checksum/delete
00144     if os.path.isdir(source):
00145       shutil.copytree( source, dest )      # copy
00146       self.__compareTree( source, dest )   # checksum
00147 
00148       if self.__purgeFiles:                # delete
00149         for root, dirs, files in os.walk(source, topdown=False):
00150           for name in files:
00151               f = os.path.join(root, name)
00152               os.chmod(f, stat.S_IWRITE) # Just in case it is read-only
00153               os.remove(f)
00154           for name in dirs:
00155               os.rmdir(os.path.join(root, name))
00156           os.rmdir(root)
00157     else:  # regular file.  much simpler
00158       shutil.copy( source, dest )         # copy
00159       sourceMD5 = self.md5sum(source)     # checksum
00160       destMD5   = self.md5sum(dest)
00161       if sourceMD5 != destMD5:
00162         msg = "md5 sums don't match for %s :" % (source)
00163         msg += "\n   source: %s, dest: %s" % ( sourceMD5, destMD5 )
00164         raise GExportException, msg
00165 
00166       if self.__purgeFiles:               # delete
00167         os.remove(source)
00168 
00169   def __rename(self, source, dest):
00170     # just move it.  No purging needed
00171     shutil.move(source, dest)
00172 
00173   def __checkDirs(self, source, dest):
00174     # check statuses
00175     sourceStatus = self.__checkStat(source)
00176     destStatus   = self.__checkStat(dest)
00177 
00178     # don't care if source dir is on a full disk or if it isn't writable
00179     if ( sourceStatus & ~(DataExport.DISK_FULL|DataExport.DIR_NOT_WRITABLE) ):
00180       msg = "Export Error: Attempt to read from source %s encountered errors: " % (source)
00181       msg += self.__statusParse(sourceStatus)
00182       raise GExportException, msg
00183     # Destination must not be extant.
00184     if not ( destStatus & DataExport.NOT_EXTANT ):
00185       msg = "Export Error: Destination %s already exists" % (dest)
00186       raise GExportException, msg
00187     if ( destStatus & ~DataExport.NOT_EXTANT ) :
00188       msg = "Export Error: Attempt to write to destination %s encountered errors: " % (dest)
00189       msg += self.__statusParse(destStatus)
00190 
00191       raise GExportException, msg
00192     # source and dest can't be the same
00193     if os.path.abspath(source) == os.path.abspath(dest):
00194       msg = "Source and destination directories are the same.  Export aborted"
00195       raise GExportException, msg
00196 
00197 
00198   def __compareTree(self, sourceDir, destDir):
00199     """!\brief Compare all files under sourceDir and destDir with md5.
00200 
00201     \param sourceDir Source directory
00202     \param destDir   Destination directory
00203     """
00204     # scan the source/dest directories
00205     # chdir so that the source/dest keys are the same
00206     saveDir = os.getcwd()
00207     os.chdir(sourceDir)
00208     sourceWalk = os.walk(".")
00209     sourceMD5 = self.__treeWalk( sourceWalk, self.md5sum )
00210 
00211     os.chdir(destDir)
00212     destWalk   = os.walk(".")
00213     destMD5   = self.__treeWalk( destWalk,   self.md5sum )
00214 
00215     # compare the checksums
00216     for file in sourceMD5.keys():
00217       if destMD5.has_key(file):
00218         if sourceMD5[file] != destMD5[file]:
00219           os.chdir(saveDir)
00220           msg = "md5 sums don't match for %s :" % (file)
00221           msg += "\n   source: %s, dest: %s" % ( sourceMD5[file], destMD5[file])
00222           raise GExportException, msg
00223         else:
00224           destMD5.pop(file)
00225       else:
00226         os.chdir(saveDir)
00227         msg = "Destination directory %s does not contain file %s from source directory %s" % \
00228               ( destDir, file, sourceDir )
00229         raise GExportException, msg
00230 
00231     if destMD5 != {}:
00232       os.chdir(saveDir)
00233       msg = "Destination directory %s contains files not in source directory:" % ( destDir )
00234       for file in destMD5.keys():
00235         msg += "\n    %s" % ( file )
00236       raise GExportException, msg
00237     os.chdir(saveDir)
00238 
00239   def __treeWalk(self, tree, function):
00240     """!\brief Walk a tree, executing function() on every file.
00241 
00242     """
00243     returnDict = {}
00244     for root, dirs, files in tree:
00245       for name in dirs + files:
00246         try:
00247           retVal = function(os.path.join(root, name))
00248         except:
00249           retVal = -1
00250         returnDict[os.path.join(root, name)] = retVal
00251     return returnDict
00252 
00253   def __checkStat(self, obj):
00254     """!\brief Check if source and directories exist, are writable,
00255     and the disks aren't full.
00256 
00257     \param obj File path
00258 
00259     \return Status
00260     """
00261     status = 0L
00262 
00263     if not os.path.exists(obj):
00264       status |= DataExport.NOT_EXTANT
00265     elif FreeSpace(os.path.dirname(obj)) < DataExport.DISK_FULL_LEVEL:
00266       status |= DataExport.DISK_FULL
00267     if not os.access(os.path.dirname(obj),os.W_OK):
00268       status |= DataExport.DIR_NOT_WRITABLE
00269 
00270     return status
00271 
00272   def __makeReadOnly(self, file):
00273     status = 0
00274     try:
00275       os.chmod(file, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
00276     except:
00277       status = -1
00278     return status
00279 
00280   def __statusParse(self, status):
00281     msg = ""
00282     if status & DataExport.DIR_NOT_WRITABLE:
00283       msg += "\n  Directory not writable"
00284     if status & DataExport.NOT_EXTANT:
00285       msg += "\n  Directory does not exist"
00286     if status & DataExport.DISK_FULL:
00287       msg += "\n  Disk is full"
00288     return msg
00289 
00290   def __sumfile(self, fobj):
00291     '''Returns an md5 hash for an object with read() method.'''
00292     m = md5.new()
00293     while True:
00294       d = fobj.read(8096)
00295       if not d:
00296         break
00297       m.update(d)
00298     return m.hexdigest()
00299 
00300 
00301 
00302 def usage():
00303   return """
00304 DataExport:  Securely copy files from one directory to another.
00305 
00306 Mandatory options:
00307 --source        Source directory
00308 --destination   Destination directory
00309 
00310 Flags:
00311 --purge         Set if source files are to be deleted after copy.
00312 """
00313 
00314 if __name__ == "__main__":
00315   """Command line interface for the Data Export system.
00316      Can be called by the user or by an automated process.
00317 
00318   """
00319 
00320   # Options, first list mandatory, second list optional
00321   options = Options(['source', 'destination'], [], ['purge'])
00322   try:
00323     options.parse()
00324   except Exception, msg:
00325     options.usage(usage())
00326     raise Exception, msg
00327 
00328   source         = options.source
00329   destination    = options.destination
00330   print "source=%s, dest=%s" % ( source, destination)
00331 
00332   exporter = DataExport()
00333 
00334   # Do we want file deletion?
00335   if options.purge is not None:
00336     exporter.setPurge(True)
00337 
00338   exporter.export(source, destination)

Generated on Thu Apr 27 20:52:41 2006 for LICOS L02-01-00 by doxygen 1.4.6-NO