Main Page | Packages | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | Related Pages

DataExport.py

00001 #!/usr/local/bin/python
00002 #
00003 #                               Copyright 2005
00004 #                                     by
00005 #                        The Board of Trustees of the
00006 #                     Leland Stanford Junior University.
00007 #                            All rights reserved.
00008 #
00009 
00010 __facility__ = "Online"
00011 __abstract__ = "GLAST LAT data export routines"
00012 __author__   = "J. Panetta <panetta@slac.stanford.edu> SLAC - GLAST LAT I&T/Online"
00013 __date__     = ("$Date: 2006/01/31 17:24:49 $").split(' ')[1]
00014 __version__  = "$Revision: 2.8 $"
00015 __release__  = "$Name: R04-12-00 $"
00016 __credits__  = "SLAC"
00017 
00018 import LATTE.copyright_SLAC
00019 
00020 import exceptions
00021 import md5
00022 import os
00023 import shutil
00024 import stat
00025 import sys
00026 import logging as log
00027 from   LATTE.tools.FreeSpace        import FreeSpace
00028 from   LATTE.client.gOptions        import Options
00029 
00030 class GExportException(exceptions.Exception):
00031   """!\brief Define an exception from the Export system.
00032   """
00033   def __init__(self, status):
00034     self.status = status
00035 
00036   def __str__(self):
00037     return repr(self.status)
00038 
00039 
00040 class DataExport:
00041   """!\brief Class to synch a file or set of files between one location and another.
00042   """
00043   # Error codes
00044   NO_ERROR                      = 0x0
00045   DIR_NOT_WRITABLE              = 0x1
00046   NOT_EXTANT                    = 0x2
00047 
00048   DISK_FULL                     = 0x10
00049 
00050   DISK_FULL_LEVEL               = 100*1024*1024  # 100 Megabytes
00051 
00052   def __init__(self):
00053     self.__purgeFiles   = False
00054 
00055     self.transfer = self.__copy   # Other valid is self.__rename
00056     pass
00057 
00058   def setPurge(self, purge):
00059     """
00060     """
00061     if purge == True:
00062       self.__purgeFiles = True
00063     else:
00064       self.__purgeFiles = False
00065 
00066   def synch(self, source, dest):
00067     """!\brief Synch up two directory structures.
00068 
00069        Synch only goes in one direction:  source --> destination.
00070     """
00071 
00072     sourceStatus = self.__checkDir(source)
00073     destStatus   = self.__checkDir(dest)
00074 
00075     # do something with statuses
00076 
00077     # get a list of directories under source.  Do this only once, as it can take time
00078     dirList = []
00079     for file in os.listdir(source):
00080       file = os.path.join(source,file)
00081       if os.path.isdir(file):
00082         dirList.append(file)
00083 
00084     for sourceDir in dirList:
00085       destDir = os.path.join(dest, os.path.basename(sourceDir))
00086       try:
00087         log.info( "Exporting from %s to %s" % ( sourceDir, destDir ) )
00088         self.exportDir(sourceDir, destDir)
00089       except Exception, e:
00090         log.error( "Failed exporting from %s to %s" % ( sourceDir, destDir ) )
00091         log.error( "  Exception: %s" % e )
00092         pass
00093 
00094   def export(self, source, dest):
00095     """!\brief Copy from source to dest.
00096 
00097        This routine will determine whether source is a file or a directory
00098     """
00099 
00100     self.__checkDirs(source, dest)
00101 
00102     # Check for common disk on Windows XP/NT (*not* CE).
00103     if os.name == 'nt':
00104       commonPath = os.path.commonprefix( [source, dest] )
00105       if len(commonPath) > 0 and self.__purgeFiles:
00106         self.transfer = self.__rename
00107       else:
00108         self.transfer = self.__copy
00109     else:
00110       pass # self.__transfer is set in constructor to self.__copy
00111 
00112     self.transfer(source, dest)
00113 
00114   def md5sum(self, fname):
00115     """!\brief Returns an md5 hash for file fname.
00116     """
00117     # md5hash from ActiveState cookbook
00118     # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/266486
00119     #
00120     # 2004-01-30
00121     #
00122     # Nick Vargish
00123     #
00124     # Simple md5 hash utility for generating md5 checksums of files.
00125     # Modified slightly by Jim Panetta
00126     try:
00127       if os.path.isdir(fname):
00128         return "Directory"
00129       f = file(fname, 'rb')
00130     except:
00131       return 'Failed to open file'
00132     ret = self.__sumfile(f)
00133     f.close()
00134     return ret
00135 
00136 
00137   def __copy(self, source, dest):
00138     # copy/checksum/delete
00139     if os.path.isdir(source):
00140       shutil.copytree( source, dest )      # copy
00141       self.__compareTree( source, dest )   # checksum
00142 
00143       if self.__purgeFiles:                # delete
00144         for root, dirs, files in os.walk(source, topdown=False):
00145           for name in files:
00146               f = os.path.join(root, name)
00147               os.chmod(f, stat.S_IWRITE) # Just in case it is read-only
00148               os.remove(f)
00149           for name in dirs:
00150               os.rmdir(os.path.join(root, name))
00151         os.rmdir(root)
00152     else:  # regular file.  much simpler
00153       shutil.copy( source, dest )         # copy
00154       sourceMD5 = self.md5sum(source)     # checksum
00155       destMD5   = self.md5sum(dest)
00156       if sourceMD5 != destMD5:
00157         msg = "md5 sums don't match for %s :" % (source)
00158         msg += "\n   source: %s, dest: %s" % ( sourceMD5, destMD5 )
00159         raise GExportException, msg
00160 
00161       if self.__purgeFiles:               # delete
00162         os.remove(source)
00163 
00164   def __rename(self, source, dest):
00165     # just move it.  No purging needed
00166     shutil.move(source, dest)
00167 
00168   def __checkDirs(self, source, dest):
00169     # check statuses
00170     sourceStatus = self.__checkStat(source)
00171     destStatus   = self.__checkStat(dest)
00172 
00173     # don't care if source dir is on a full disk or if it isn't writable
00174     if ( sourceStatus & ~(DataExport.DISK_FULL|DataExport.DIR_NOT_WRITABLE) ):
00175       msg = "Export Error: Attempt to read from source %s encountered errors: " % (source)
00176       msg += self.__statusParse(sourceStatus)
00177       raise GExportException, msg
00178     # Destination must not be extant.
00179     if not ( destStatus & DataExport.NOT_EXTANT ):
00180       msg = "Export Error: Destination %s already exists" % (dest)
00181       raise GExportException, msg
00182     if ( destStatus & ~DataExport.NOT_EXTANT ) :
00183       msg = "Export Error: Attempt to write to destination %s encountered errors: " % (dest)
00184       msg += self.__statusParse(destStatus)
00185 
00186       raise GExportException, msg
00187     # source and dest can't be the same
00188     if os.path.abspath(source) == os.path.abspath(dest):
00189       msg = "Source and destination directories are the same.  Export aborted"
00190       raise GExportException, msg
00191 
00192 
00193   def __compareTree(self, sourceDir, destDir):
00194     """!\brief Compare all files under sourceDir and destDir with md5.
00195 
00196     \param sourceDir Source directory
00197     \param destDir   Destination directory
00198      """
00199     # scan the source/dest directories
00200     # chdir so that the source/dest keys are the same
00201     saveDir = os.getcwd()
00202     os.chdir(sourceDir)
00203     sourceWalk = os.walk(".")
00204     sourceMD5 = self.__treeWalk( sourceWalk, self.md5sum )
00205 
00206     os.chdir(destDir)
00207     destWalk   = os.walk(".")
00208     destMD5   = self.__treeWalk( destWalk,   self.md5sum )
00209 
00210     # compare the checksums
00211     for file in sourceMD5.keys():
00212       if destMD5.has_key(file):
00213         if sourceMD5[file] != destMD5[file]:
00214           os.chdir(saveDir)
00215           msg = "md5 sums don't match for %s :" % (file)
00216           msg += "\n   source: %s, dest: %s" % ( sourceMD5[file], destMD5[file])
00217           raise GExportException, msg
00218         else:
00219           destMD5.pop(file)
00220       else:
00221         os.chdir(saveDir)
00222         msg = "Destination directory %s does not contain file %s from source directory %s" % \
00223               ( destDir, file, sourceDir )
00224         raise GExportException, msg
00225 
00226     if destMD5 != {}:
00227       os.chdir(saveDir)
00228       msg = "Destination directory %s contains files not in source directory:" % ( destDir )
00229       for file in destMD5.keys():
00230         msg += "\n    %s" % ( file )
00231       raise GExportException, msg
00232     os.chdir(saveDir)
00233 
00234   def __treeWalk(self, tree, function):
00235     """!\brief Walk a tree, executing function() on every file.
00236     """
00237     returnDict = {}
00238     for root, dirs, files in tree:
00239       for name in dirs + files:
00240         try:
00241           retVal = function(os.path.join(root, name))
00242         except:
00243           retVal = -1
00244         returnDict[os.path.join(root, name)] = retVal
00245     return returnDict
00246 
00247   def __checkStat(self, obj):
00248     """!\brief Check if source and directories exist, are writable,
00249     and the disks aren't full.
00250 
00251     \param obj File path
00252     \return Status
00253     """
00254     status = 0L
00255 
00256     if not os.path.exists(obj):
00257       status |= DataExport.NOT_EXTANT
00258     elif FreeSpace(os.path.dirname(obj)) < DataExport.DISK_FULL_LEVEL:
00259       status |= DataExport.DISK_FULL
00260     if not os.access(os.path.dirname(obj),os.W_OK):
00261       status |= DataExport.DIR_NOT_WRITABLE
00262 
00263     return status
00264 
00265   def __makeReadOnly(self, file):
00266     status = 0
00267     try:
00268       os.chmod(file, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
00269     except:
00270       status = -1
00271     return status
00272 
00273   def __statusParse(self, status):
00274     msg = ""
00275     if status & DataExport.DIR_NOT_WRITABLE:
00276       msg += "\n  Directory not writable"
00277     if status & DataExport.NOT_EXTANT:
00278       msg += "\n  Directory does not exist"
00279     if status & DataExport.DISK_FULL:
00280       msg += "\n  Disk is full"
00281     return msg
00282 
00283   def __sumfile(self, fobj):
00284     '''Returns an md5 hash for an object with read() method.'''
00285     m = md5.new()
00286     while True:
00287       d = fobj.read(8096)
00288       if not d:
00289         break
00290       m.update(d)
00291     return m.hexdigest()
00292 
00293 
00294 
00295 def usage():
00296   return """
00297 DataExport:  Securely copy files from one directory to another.
00298 
00299 Mandatory options:
00300 --source        Source directory
00301 --destination   Destination directory
00302 
00303 Flags:
00304 --purge         Set if source files are to be deleted after copy.
00305 """
00306 
00307 if __name__ == "__main__":
00308   """Command line interface for the Data Export system.
00309      Can be called by the user or by an automated process.
00310 
00311   """
00312 
00313   # Options, first list mandatory, second list optional
00314   options = Options(['source', 'destination'], [], ['purge'])
00315   try:
00316     options.parse()
00317   except Exception, msg:
00318     options.usage(usage())
00319     raise Exception, msg
00320 
00321   source         = options.source
00322   destination    = options.destination
00323   print "source=%s, dest=%s" % ( source, destination)
00324 
00325   exporter = DataExport()
00326 
00327   # Do we want file deletion?
00328   if options.purge is not None:
00329     exporter.setPurge(True)
00330 
00331   exporter.export(source, destination)

Generated on Fri Jul 21 13:26:27 2006 for LATTE R04-12-00 by doxygen 1.4.3