# autoUpdateDimasDB.py import os import sys sys.path.append(r"../plugin-MySQL") import time import pywintypes, win32api, win32con import glob import pickle import re import SQLFactory import zlib from openbabel import * #from createOrUpdateHTMLFileInFolder import CreateHTMLFileFromInfoInDB #from updateDBfromfile import UpdateDBfromFile def getPath(fname,dbBaseDir): """Returns the folder path, file path and structure ID number of a given file based on the filename.""" if int(fname[0:2]) < 90: year = "20" + fname[0:2] elif int(fname[0:2]) >= 90: year = "19" + fname[0:2] ID = fname.split(".")[0] folderPath = "%s\\%s\\%s" %(dbBaseDir,year, ID) filePath = "%s\\%s" %(folderPath,fname) return folderPath, filePath, ID class AutoUpdateDimasDB(object): def __init__(self, dir, database, types=['cif', 'hkl', 'p4p', 'prp', 'res', 'doc']): """The first argument is the basedirectory of the database. Second argument is the database. The third argument is a list of file types""" self.ds = SQLFactory.SQLFactory(db=database) self.dir = dir self.logPath = r"%s\log.txt" %self.dir self.timestampsFilePath = "%s\\timestamps.pickle" %self.dir self.types = types def run(self): # Turn off read-only attribute of log file and then open in append mode, write start time to log file self.change_file_attribute(self.logPath, readOnly = False) self.log = open(self.logPath, 'a') self.log.write("Start time: %s\n\n" %time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time()))) self.updateFromFile() self.get_list_of_files() self.cifDataItems() self.do_smiles() self.uploadFiles(update=True) self.createHTMLFiles() # Write finish time to log file, trim the file size, close the file, then make it to read-only self.log.write("\nFinish time: %s\n\n----------------------------------------------\n\n" %time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time()))) self.trimLogFileSize(numberLines=2000) self.log.close() self.change_file_attribute(self.logPath, readOnly = True) def change_file_attribute(self, filePath, hidden=False, readOnly=False): """Changes the file attributes 'Hidden' and 'Read-only'. First argument is the path of the file whose attributes are to be changed. Second and third arguments are the values of hidden and readOnly, which should be either True or False. """ x = win32api.GetFileAttributes(filePath) if hidden == True: x |= win32con.FILE_ATTRIBUTE_HIDDEN elif hidden == False: x &= ~win32con.FILE_ATTRIBUTE_HIDDEN win32api.SetFileAttributes(filePath, x) if readOnly == True: x |= win32con.FILE_ATTRIBUTE_READONLY elif readOnly == False: x &= ~win32con.FILE_ATTRIBUTE_READONLY win32api.SetFileAttributes(filePath, x) def cifDataItems(self): """Defines a list of cif items to be extracted from a cif file and uploaded to the dimas database. The value of each dictioanry item is another dictionary which contains the database table name and the alias or field name for that cif item. The method 'uploadDataItems' is then called for each modified cif file to upload extract the cif items from the cif file and upload them to the dimas database. """ data_items = { '_refine_ls_R_factor_gt':{'table':'refinement', 'alias':'refine_ls_R_factor_gt'}, '_refine_ls_R_factor_obs':{'table':'refinement', 'alias':'refine_ls_R_factor_gt'}, '_diffrn_ambient_temperature':{'table':'diffraction', 'alias':'diffrn_ambient_temperature'}, '_symmetry_cell_setting':{'table':'refinement', 'alias':'Setting'}, '_symmetry_space_group_name_H-M':{'table':'refinement', 'alias':'SpaceGroup'}, '_exptl_crystal_colour':{'table':'crystal', 'alias':'exptl_crystal_colour'}, '_exptl_crystal_size_min':{'table':'crystal', 'alias':'exptl_crystal_size_min'}, '_exptl_crystal_size_mid':{'table':'crystal', 'alias':'exptl_crystal_size_mid'}, '_exptl_crystal_size_max':{'table':'crystal', 'alias':'exptl_crystal_size_max'}, '_exptl_crystal_description':{'table':'crystal', 'alias':'CifShape'}, } type = "cif" table = "diffraction" for file in self.modifiedFiles["*.%s" %type]: folderPath,cifPath,ID = getPath(file,self.dir) self.uploadDataItems(cifPath,ID,data_items,update=True) def do_smiles(self): """Creates smiles and inchi files for all modified cif files, and then uploads the files to the dimas database.""" self.log.write("\n") bable_crash = ['96srv031', '96srv059', '96srv071', '96srv077', '96srv078', '96srv079', '96srv080', '96srv083', '96srv086', '96srv087', '96srv088', '96srv089', '96srv093', '96srv096', '96srv097', '96srv098', '96srv099', '96srv109', '96srv112', '96srv116', '96srv117', '00CKB005', '01srv096', '01srv146', '04srv388', '05srv152', '06srv070', ] bable_crash = [] type = "cif" for folder in self.modifiedFiles["*.%s" %type]: folderPath,cifPath,ID = getPath(folder,self.dir) if os.path.exists(cifPath): if ID in bable_crash: continue if 'meta' in ID: continue try: smi, inchi = self.create_smiles(cifPath) except: self.log.write("@@@@@ %s has caused OpenBabel to fail @@@@\n" %str(cifPath)) print "@@@@@ %s has caused OpenBabel to fail @@@@" %str(cifPath) continue sql = r'INSERT refinement (ID,Smile,Inchi) VALUES("%s","%s","%s") ON DUPLICATE KEY UPDATE Smile="%s", Inchi="%s";' %( ID, smi, inchi, smi, inchi) try: res = self.ds.run_sql(sql) except: self.log.write("!!!! OpenBabel sql failed for %s!!!!\n" %ID) print "PROBLEM!!!!" else: print "No cif file in %s" %ID self.log.flush() def create_smiles(self, cifPath): """Creates SMILES and InChi files from the given cif path, returns the smiles and inchi strings.""" obConversion = OBConversion() obConversion.SetInFormat("cif") obConversion.SetOutFormat("smi") obmol = OBMol() print "Going to read %s for SMILE conversion" %cifPath #time.sleep(1) try: notatend = obConversion.ReadFile(obmol,cifPath) except: return outSmile = obConversion.WriteString(obmol) smileFile = "%ssmi" %cifPath[:-3] wFile = open(smileFile, 'w') wFile.write(outSmile) wFile.close() self.log.write("Written %s\n" %smileFile) obConversion.SetInFormat("cif") #obConversion.SetOutFormat("inchi a") obConversion.SetOutFormat("inchi") #obConversion.AddOption( "a", OBConversion.OUTOPTIONS ) obmol = OBMol() print "Going to read %s for INCHI conversion" %cifPath notatend = obConversion.ReadFile(obmol,cifPath) outInchi = obConversion.WriteString(obmol) inchiFile = "%sinchi" %cifPath[:-3] wFile = open(inchiFile, 'w') wFile.write(outInchi) wFile.close() self.log.write("Written %s\n" %inchiFile) print "Written %s" %inchiFile return outSmile, outInchi def uploadDataItems(self, filePath, ID, data_items={}, update=False): """Reads a cif file and uploads each cif item in a dictionary of cif items to the dimas database. First argument is the path of the cif file. Second argument is the dimas structure ID, or service number. Third argument a dictionary of cif data items to be extracted from the cif file. The value of each dictionary item is another dictionary which contains the database table name and the alias or field name for that cif item. If update is set to True (default value is False), and the specified structure ID is already in the table, the old row is will be updated. """ if os.path.exists(filePath): fileData = open(filePath,'rb').readlines() dataadded = "" for data_item in data_items: value = False for item in fileData: item = item.strip() try: if item.split()[0] == data_item: try: value = "%s" %item.split(data_item)[1].strip().strip("'") except: pass break except: continue if not value: print "Not updated %s: No value for %s" %(ID, data_item) continue table = data_items[data_item].get('table') subject = table.split("_")[0] db_field = data_items[data_item].get('alias', data_item) if update: sql = """INSERT INTO %s (ID, %s) VALUES ("%s", "%s") ON DUPLICATE KEY UPDATE %s = "%s" """ %(table,db_field,ID,value,db_field,value) else: sql = self.ds.create_insert_sql({'ID':"'%s'" %ID, '%s' %db_field:"'%s'" %value}, table) try: res = self.ds.run_sql(sql) dataadded += "%s = %s\n" %(data_item, str(value)) except: self.log.write("!!!! %s There was an error with this table %s!!!!\n" %(ID, table)) self.log.write("Inserted into %s from cif file:\n%s\n" %(ID, dataadded[:])) print "Inserted into %s: %s" %(ID, dataadded[:]) else: print "No cif file in %s" self.log.flush() return def uploadFiles(self, update=False, compress=True): """Uploads each file in self.modifiedFiles to the dimas database. If update is set to True (default value is False), and the specified structure ID is already in the table, the file in the database will be replaced by the modified file. If compress is set to True (default value is True), the file will be compressed using the zlib module before being uploaded to the database. """ self.log.write("\n") # sort out which files are to go in which table in the database diffraction_files = ('hkl','p4p','prp') refinement_files = ('cif', 'res', 'doc') for type in self.types: for file in self.modifiedFiles["*.%s" %type]: folderPath,filePath,ID = getPath(file,self.dir) if os.path.exists(filePath): fileData = open(filePath,'rb').read() if compress: fileData = zlib.compress(fileData,9) fileData = self.ds.escape_strings(fileData) if type in diffraction_files: table = "diffraction_data" elif type in refinement_files: table = "refinement_data" else: pass if update: sql = r'INSERT %s (ID,%s) VALUES("%s","%s") ON DUPLICATE KEY UPDATE %s="%s";' %(table, type, ID, fileData, type, fileData,) else: sql = r'INSERT %s (ID,%s) VALUES("%s","%s");' %(table, type, ID, fileData) res = self.ds.run_sql(sql) if res == "Success": self.log.write("Successfully uploaded %s file for %s\n" %(type, ID)) elif res == "Failure": self.log.write("!!!! The upload of %s file for %s failed!!!!\n" %(type, ID)) print "%s with %s file of %s" %(res, type, ID) self.log.flush() def updateFromFile(self): """Updates the dimas database for structures where an update file has been created or modified.""" StructuresUpdatedFromFile = UpdateDBfromFile().updateDBfromFile() if StructuresUpdatedFromFile: self.log.write("The dimas database was updated from a dimas_update.txt file for the following structures: %s\n\n" %StructuresUpdatedFromFile) self.log.flush() def createHTMLFiles(self): """Creates or updates html files containing information about the each structure in its database folder.""" #from createOrUpdateHTMLFileInFolder2 import CreateHTMLFileFromInfoInDB createdHtmlFiles,updatedHtmlFiles,failedHtmlFiles = CreateHTMLFileFromInfoInDB(self.dir).create_and_update_all_HTML_files() if createdHtmlFiles or updatedHtmlFiles or failedHtmlFiles: self.log.write("\n") if createdHtmlFiles: self.log.write("New HTML files created: %s\n" %createdHtmlFiles) if updatedHtmlFiles: self.log.write("HTML files updated: %s\n" %updatedHtmlFiles) if failedHtmlFiles: self.log.write("The following files failed whilst trying to update the HTML file: %s\n" %failedHtmlFiles) else: self.log.write("No HTML files were created or updated") self.log.flush() def get_list_of_files(self): """Gets a list of all files of type given in the list 'type' that have been modified. The modification time for each file is compared with the timestamp stored in the timestamps.pickle file. """ print "Compiling a list of all modified %s files in %s. This might take some time" %(self.types, self.dir) type_full = [] self.modifiedFiles = {} for i in range(len(self.types)): type_full.append("*.%s" %self.types[i]) if os.path.exists(self.timestampsFilePath): inFile = open(self.timestampsFilePath, "r") try: self.pickle_modification_times = pickle.load(inFile) except ValueError: # error loading pickle file self.pickle_modification_times = {} else: self.pickle_modification_times = {} self.change_file_attribute(self.timestampsFilePath, readOnly = False, hidden = False) for type in type_full: print "Compiling a list of modified %s files" %type modifiedFiles = [] for year in range(1993,2009): path = "%s\\%s" %(self.dir,year) list_dir = os.listdir(path) for dir in list_dir: fileTypePath = "%s\\%s\\%s" %(path, dir, type) fileList = glob.glob(fileTypePath) if not fileList: pass else: for file in self.checkIfFileModified(fileList): modifiedFiles.append(file) self.modifiedFiles[type] = modifiedFiles self.updateTimestampsFile() self.change_file_attribute(self.timestampsFilePath, readOnly = True, hidden = True) def checkIfFileModified(self,fileList): """Checks a if any files in a list of files have been modified, and returns a list of modified files.""" for file in fileList: modifiedFiles = [] fileName = file.split("\\")[-1] ID = fileName.split(".")[0] if re.match("\d\d", ID) and len(ID) in (6,7,8): fileModificationTime = os.path.getmtime(file) if not self.pickle_modification_times.has_key(fileName): self.pickle_modification_times[fileName]=fileModificationTime modifiedFiles.append(fileName) elif fileModificationTime > self.pickle_modification_times[fileName]: self.pickle_modification_times[fileName]=fileModificationTime modifiedFiles.append(fileName) else: pass return modifiedFiles def _getModifiedFolders(self, list_dir, dir): new_modification_times = {} modifiedFolders = [] for item in list_dir: path = "%s\\%s" %(dir,item) if os.path.isdir(path) and re.match("\d\d", item): new_modification_times[item] = os.path.getmtime(path) for key in new_modification_times.keys(): if not self.pickle_modification_times.has_key(key): modifiedFolders.append(key) self.pickle_modification_times[key]=os.path.getmtime(path) elif new_modification_times[key] > self.pickle_modification_times[key]: modifiedFolders.append(key) self.pickle_modification_times[key] = new_modification_times[key] else: pass return modifiedFolders def updateTimestampsFile(self): """Dumps the updated timestamps dictionary to the pickle file.""" outFile = open(self.timestampsFilePath, 'w') pickle.dump(self.pickle_modification_times,outFile) outFile.close() return #def getPath(self,fname): #"""Returns the folder path, file path and structure ID number of a given file based on the filename.""" #if int(fname[0:2]) < 90: #year = "20" + fname[0:2] #elif int(fname[0:2]) >= 90: #year = "19" + fname[0:2] #ID = fname.split(".")[0] #folderPath = "%s\\%s\\%s" %(self.dir,year, ID) #filePath = "%s\\%s" %(folderPath,fname) #return folderPath, filePath, ID def trimLogFileSize(self,numberLines=500): """Trims the size of the log file so that the number of lines it contains is less than the given number.""" self.change_file_attribute(self.logPath, hidden = False) f = open(self.logPath, 'r') lines = f.readlines() while len(lines) > numberLines: i = 0 while lines[i][:10] != "----------": i += 1 f.close() f = open(self.logPath, 'w') f.writelines(lines[i+2:]) f.close() f = open(self.logPath, 'r') lines = f.readlines() f.close() self.change_file_attribute(self.logPath, hidden = True) return class CreateHTMLFileFromInfoInDB: def __init__(self, dir): self.ds = SQLFactory.SQLFactory(db='DimasDB') self.dir = dir self.createdFiles = [] self.updatedFiles = [] self.failed_files = [] def main(self): #self.createHTMLFile() self.create_and_update_all_HTML_files() def create_and_update_all_HTML_files(self): sql = """SELECT submission.ID FROM submission""" list_ID = self.ds.run_select_sql(sql) #list_ID = ({'ID': '00srv150'}, {'ID': '00srv151'}, {'ID': '00srv152'}, {'ID': '00srv153'}, {'ID': '00srv154'}, {'ID': '00srv155'}, {'ID': '00srv156'}, {'ID': '00srv157'}, {'ID': '00srv158'}, {'ID': '00srv159'}, {'ID': '00srv160'}, {'ID': '00srv161'}, {'ID': '00srv162'}, {'ID': '00srv163'}, {'ID': '00srv164'}, {'ID': '00srv165'}, {'ID': '00srv166'}, {'ID': '00srv168'}, {'ID': '00srv169'}, {'ID': '00srv170'}, {'ID': '00srv171'}, {'ID': '00srv172'}, {'ID': '00srv173'}, {'ID': '00srv174'}, {'ID': '00srv175'}, {'ID': '00srv176'}, {'ID': '00srv177'}, {'ID': '00srv178'}, {'ID': '00srv179'}) for item in list_ID: fname = item['ID'] if len(fname) >= 6 and len(fname) <=8: try: int(fname[0:2]) folderPath, filePath, ID = getPath(fname,self.dir) ## breakpoint to stop at particular file #if fname == "00asb001": #print fname #else: #pass ##overwrite all files #self.createHTMLFile() #print "Created HTML file for %s" %fname # update current exisiting files, create new files if os.path.exists(folderPath) and not os.path.exists(filePath): CreateHtmlFile(fname,folderPath).createHTMLFile() #self.createHTMLFile() self.createdFiles.append(fname) print "Created HTML file for %s" %fname elif os.path.exists(self.filePath) and self.compareFileModificationWithDatabaseTimestamps(): CreateHtmlFile(fname,folderPath).createHTMLFile() #self.createHTMLFile() self.updatedFiles.append(fname) print "Updated HTML file for %s" %fname elif not os.path.exists(folderPath): #print "The path %s does not exist" %self.path pass else: #print "No change made to HTML file for %s" %fname pass except: print "!!!! %s either does not exist or is not a legitamate file name!!!!" %fname pass else: pass return self.createdFiles, self.updatedFiles, self.failed_files def compareFileModificationWithDatabaseTimestamps(self): self.getTimeHTMLLastModified() self.getTimestampsFromDB() a = 0 for i in self.timestamp: if i > self.modified: a = a + 1 else: pass if a == 0: #print "The database has not been modified for %s" %self.fname return False else: print "The database has been modified for %s" %self.fname return True def getTimeHTMLLastModified(self): #self.getPath() if os.path.exists(self.htmlPath): self.modified = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(os.path.getmtime(self.htmlPath))) else: print "The path %s does not exist" %self.htmlPath def getTimestampsFromDB(self): tuple = ("submission", "crystal", "diffraction", "refinement", "progress", "reference", "diffraction_cell") self.timestamp = [] for item in tuple: try: self.query("""SELECT Timestamp FROM %s WHERE ID = "%s";""" %(item,self.fname)) self.timestamp.append(self.dictionary['Timestamp']) except: pass #def getPath(self,fname,dbBaseDir): #"""Returns the folder path, file path and structure ID number of a given file based on the filename.""" #if int(fname[0:2]) < 90: #year = "20" + fname[0:2] #elif int(fname[0:2]) >= 90: #year = "19" + fname[0:2] #ID = fname.split(".")[0] #folderPath = "%s\\%s\\%s" %(dbBaseDir,year, ID) #filePath = "%s\\%s" %(folderPath,fname) #return folderPath, filePath, ID class CreateHtmlFile: def __init__(self,fname,folderPath): self.ds = SQLFactory.SQLFactory(db='DimasDB') self.fname = fname self.folderPath = folderPath self.htmlPath = "%s\\%s.html" %(self.folderPath,self.fname) def createHTMLFile(self): #self.getPath() if os.path.exists(self.folderPath): self.sqls() self.list_all_labels() self.outfile = open(self.htmlPath, 'w') self.outfile.write("\n
\n%s | %s(%s) | |
%s | %s, %s, %s, %s, (%s) | |
%s | %s x %s x %s mm3 | |
%s | %s %s %s | |
%s | %s |