from unicode import latin1_to_ascii import pdb import re import os import sys import time import glob import gzip import platform import os.path import readline #Globals difficulties = ["Beginner", "Easy", "Medium", "Challenge", "Difficult", "Virtuoso"] currOS = platform.platform() if currOS.find('Windows') == 0: LOCALBASE = 'D:/website/' else: LOCALBASE = '/mnt/d/website/' HTTPBASE = "file://" CURRDIR = re.sub('\\\\+', '/', os.getcwd()) + '/' FACHEAD = LOCALBASE + "facsimiles/" # count of new/incorrect proper names: composers, # publishers, anthologists, etc. nameCount = 0 # carries form data from one form to the next. diData = {} startTime = int(time.time()) #Data/input files contained in local website base NAMESFILE = LOCALBASE + "names.tsv" INSTSFILE = LOCALBASE + "insts.tsv" TYPEFILE = LOCALBASE + "types.tsv" CONTRIBDIR = LOCALBASE + 'contributors/' TEMPLATE = CONTRIBDIR + 'template.ft3' #This will change when everything is debugged #PERSONALDIR = CONTRIBDIR + lastName + firstName[0] PERSONDIR = CONTRIBDIR # Error/output files located in current directory NEWNAMES = CURRDIR + "newnames.txt" ERRFILE = CURRDIR + "errors" RTFPREFIX = "{\\rtf1\\ansi\\ansicpg1252\\deff0\\deflang1033{\\fonttbl{\\f0\\fnil\\fcharset0 MS Shell\r\nDlg;}}\r\n\\viewkind4\\uc1\\pard\\f0\\fs-22 "; RTFSUFFIX = "\\par\r\n}\r\n"; IMAGETYPES = ('.png', '.tif', '.pdf', '.jpg') # Compiled regular expressions #use re.DOTALL to match \n as well # = number in a series; % = arbitrary number to make source unique reUnicode = re.compile('(\\\\u.... )') reSplitAndOr = re.compile('(..*) (?:and|or) (..*)') reGetDate = re.compile('\D\((>?c?a?\.? ?[1-2][0-9]{3})\)', re.DOTALL) reApproxDate = re.compile('(ca?\.? ?)[1-2][0-9]{3}', re.DOTALL) reParenContents = re.compile('^([^(]*)\(([^)]+)\)(.*)', re.DOTALL) reEntabulated = re.compile('([IiEe]ntabulated)', re.DOTALL) reEnc = re.compile(r'[Ee](nc\.)', 1) reEd = re.compile(r'[Ee](d\.)', 1) reInfo = re.compile('^(...).*?: *(..*)$', re.DOTALL) reKey = re.compile('^[A-G][b#]*[Mm]$',re.DOTALL) reDeLang = re.compile(r'(\\lang[0-9]{4)}') # These are for error messages # for current func name, specify n = 0 or no argument. # for name of caller of current func, specify 1. # for name of caller of caller of current func, specify 2. etc. funcName = lambda n=1: sys._getframe(n + 1).f_code.co_name callerName = lambda n=2: sys._getframe(n + 1).f_code.co_name lineNo = lambda n=1: sys._getframe(n + 1).f_lineno field_map = [ ("Input", "stInfile"), ("Title", "title"), ("Subtitle", "subtitle"), ("Composer", "composer"), ("Orig. composer", "composer0"), ("Ensemble", "ensemble"), ("Part", "part"), ("Key", "key"), ("Difficulty", "difficulty"), ("Type", "type"), ("Source", "source"), ("Document", "document"), ("Volume", "volume"), ("Date", "date"), ("Page", "page"), ("Section", "section"), ("Editor", "editor"), ("Encoder", "encoder"), ("Intabulator", "intabulator"), ("Arranger", "arranger"), ("Contributor", "contributor"), ("Remarks", "remarks"), ("Output", "outfile"), ("Footnote", "footnote"), ("Concordances", "concordances"), ("Info", "info"), ("Piece", "piece"), ("Recording", "recurl"), ("Facsimile", "facurl"), ("PDF", "stPdf"), ("Midi", "stMidi"), ("Modified", "mtime"), ("Created", "ctime"), ] #Creates fronimo object from file with name stIn class Fronimo: def __init__(self, stIn = TEMPLATE): # set all attributes to empty string for col,att in field_map: setattr(self, att, "") self.base = LOCALBASE self.currProg = '' self.credits = '' self.begText = '' self.endText = '' self.performance = False self.simple = False self.ornamented = False self.footnote = "" #input file name set from argument stIn self.stFile = stIn self.stFron = '' self.flFron = None self.flOut = None self.flErr = None self.offset = None self.oldOffset = None self.volume = "" self.info = "" self.recurl = "" self.facurl = "" # Creates self.stFron from fronimo file: self.stFile if self.read_and_unzip_file(self.stFile) == False: print("OSError: Cannot read and unzip input file: ", \ self.stFile, file=sys.stderr) # marker for failure of class instantiation self.start_offset = -1 else: self.start_offset = self.get_start_offset() if self.start_offset > 0: # Fills in other values of fronimo object. if self.populate() == False: self.start_offset = -1 # end of __init___ # Open error file @classmethod def open_error(kls, errfile): try: kls.flErr = open(errfile, "w", encoding = 'latin1') except OSError: print("OSError: Cannot open error File ", errfile, " for writing.", file=sys.stderr) return False except: print("Other error: Cannot open error File ", errfile, " for writing.", file=sys.stderr) return False return True # End open_error @classmethod def open_file(kls, fl, mode): try: flName = open(fl, mode, encoding='latin1') except OSError: err= "OSError: Cannot open file %s in mode %s." % (fl, mode) kls.print_error(fl, err) return None except: err= "Other error: Cannot open file %s in mode %s." % (fl, mode) kls.print_error(fl, err) return None return flName # end of openFile # open and read all needed fronimo-related files @classmethod def open_files(kls): kls.nameList = [] kls.typeList = [] kls.instList = [] kls.noFacList = [] # Read in and process list of recognized names kls.flNames = kls.open_file(NAMESFILE, "r") if not kls.flNames: print("Cannot open NAMESFILE file.") return False stFacNames = kls.flNames.read() lsIn = stFacNames.split('\n') for line in lsIn: lsRec = line.split('\t') # Must have at least last name and first name. if len(lsRec) < 2: continue # First, get the default directory name for this proper name source # Capitalized last name + capitalized first 1st ltr of 1st name # Get the first and last names from 1st and 2nd list record field last = lsRec[0] if len(lsRec) >= 2: first = lsRec[1] else: first = "" if first != '': slug = last + first.upper()[0] else: slug = last # Remove accents slug = latin1_to_ascii(slug) # Prepend it to the list record lsRec.insert(0, slug) # and add the record to the names list. # So nameList now has slug, lastName, firstName #forget other fields in names.tsv file, for now while len(lsRec) > 3: lsRec.pop(len(lsRec) - 1) kls.nameList.append(lsRec) # Read in instrument list kls.flInsts = kls.open_file(INSTSFILE, "r") if not kls.flInsts: print("Cannot open INSTSFILE file.") return False stInst = kls.flInsts.read() lsIn = stInst.split('\n') for line in lsIn: lsRec = line.split('\t') kls.instList.append(lsRec) # Read in types list kls.flTypes = kls.open_file(TYPEFILE, "r") if not kls.flTypes: print("Cannot open TYPEFILE file.") return False stType = kls.flTypes.read() lsIn = stType.split('\n') for line in lsIn: #NEW!! # get rid of leading tabs line = line.strip() lsRec = line.split('\t') kls.typeList.append(lsRec) #Open error lists kls.flNewNames = kls.open_file(NEWNAMES, "w") if not kls.flNewNames: print("Cannot open NEWNAMES file.") return False return True # end open_files # Get canonical name for type @classmethod def find_canonical_type(kls,typ): typ = typ.strip() typ = typ.lower() for item in kls.typeList: if item[0] == typ: return item[1] return None #end find_canonical_type # Finds all types in a hierarchy below a given canonical type @classmethod def get_all_types(kls, typ): typOut = [typ] for t in kls.typeList: if len(t) == 3: #only look at items that have higher types # if there is an "and" tpH = t[2].split('&') # See if the search pattern matches one of the alternatives if typ in tpH: newType = kls.get_all_types(t[1]) if newType: # No duplications allowed if not newType in typOut: typOut = typOut + newType return typOut # Starts with a comma-separated list of types, validates them, #gets canonical name, # and returns the validated list with all sub-types @classmethod def get_type_list(kls, typesIn): typelist = typesIn.split(',') typesOut = [] for typ in typelist: t = kls.find_canonical_type(typ) if t == None: print("Type %s not found." % typ) # Fronimo.print_error("Type %s not found." % typ) continue else: typesOut += kls.get_all_types(t) return(typesOut) # print helpful error message @classmethod def print_error(kls, currFile, errMsg): print("function:%s;caller:%s; line:%d\nfile:'%s'; %s." % (funcName(), callerName(), lineNo(), currFile, errMsg), file=kls.flErr) kls.flErr.flush() # Get starting offset for reading/writing a fronimo file def get_start_offset(self): if (self.stFron[4] == '\x15') or (self.stFron[4] == '\x14'): return 364 elif self.stFron[4] == '\x16': return 368 else: val = hex(ord(self.stFron[4])) msg = "stFron[4] = %s (not \\x15 or \\x16), so starting offset is unknown" % (val) Fronimo.print_error(self.stFile, msg) return -1 # end get_start_offset # load values from a form into a Fronimo class instance def load_form(self, diForm): for key, value in diForm.items(): setattr(self, key, value) # end of load_changes # read and unzip a fronimo file: self.flFile into self.stFron # Fronimo files are zipped def read_and_unzip_file(self, stIn): self.flFron = gzip.open(stIn) if self.flFron == None: Fronimo.print_error(stIn, 'Cannot open and unzip fronimo file.') return False # Read entire file into a global byte array btFron = self.flFron.read() if len(btFron) < 100: Fronimo.print_error(self.stFile, 'Cannot read fronimo file.') return False self.stFron = btFron.decode("latin1") if len(self.stFron) < 100: Fronimo.print_error(self.stFile, 'Cannot decode fronimo file.') return False return True # end of read_and_unzip_file # get x number of chars from string. Updates offset value def _get(self, numChars): if numChars == 0: return "" oldOffset = self.offset self.offset += numChars if (self.offset) > len(self.stFron): errMsg = "Cannot get %d chars starting at offset %d" % (numChars, oldOffset) Fronimo.print_error(self.stFile, errMsg) return "" return self.stFron[oldOffset:self.offset] # Get two bytes of info def _getWord(self, fSigned): inCh = self._get(1) if inCh == "": errMsg = "No first byte in stFron at offset %d" % (self.offset) Fronimo.print_error(self.stFile, errMsg) return -1 word1 = ord(inCh) inCh = self._get(1) if inCh == "": errMsg = "No 2nd byte in stFron at offset %d" % (self.offset) Fronimo.print_error(self.stFile, errMsg) return -1 word2 = 256 * ord(inCh) word = word1 + word2 if (fSigned and word > 32768): word -= 65536 return word # Get a fronimo-formatted string def _getBstr(self): # first byte is string length if < 255 firstByte = self._get(1) if firstByte == False: errMsg = "No first byte in stFron at offset %d" % (self.offset) Fronimo.print_error(self.stFile, errMsg) return "" length = ord(firstByte) if length == 0: return "" # First char 255 means a long string. # Next 2 chars determine string length as an unsigned integer if length == 255: length = self._getWord(False) if length == -1: errMsg = "Zero string length from _getWord, offset %d" % (self.offset) Fronimo.print_error(self.stFile, errMsg) return "" stOut = self._get(length) if stOut == "": errMsg = "Result of get(length) is "", at offset %d" % (self.offset) Fronimo.print_error(self.stFile, errMsg) return "" return stOut def _put(self, stNew): # Assumes new string is RTF'd, if necessary, but not in fronimo string # format, with leading length indicator # Find length of old fronimo string ch = self._get(1) if ch == '\xFF': # means next 2 chars determine length byte1 = self._get(1) if byte1 == "": errMsg = "No first byte in stFron at offset %d" % (self.offset) Fronimo.print_error(self.stFile, errMsg) return False word1 = ord(byte1) byte2 = self._get(1) if byte2 == "": errMsg = "No second byte in stFron at offset %d" % (self.offset) Fronimo.print_error(self.stFile, errMsg) return False # second byte is higher order word2 = 256 * ord(byte2) # Length of actual string + 3 bytes to specify the length length = word1 + word2 + 3 # reset offset to compensate for 3 _get(1)'s self.offset -= 3 else: # Length of actual string + 1 byte to specify the length length = ord(ch) + 1 # reset offset to compensate for 1 _get(1) self.offset -= 1 # find length of new string newLen = len(stNew) if newLen < 255: stInsert = chr(newLen) # We will add one length indicator to the head of the string newLen += 1 else: stInsert = chr(255) + chr(newLen % 256) + chr(int(newLen / 256)) # We will add three length indicators to the head of the string newLen += 3 # Add leading length indicator stNew = stInsert + stNew # splice in the new string stOut = self.stFron[:self.offset] + stNew + self.stFron[self.offset + length:] # set new offset self.offset += newLen self.stFron = stOut return True # end _put def latin2rtf(self, stIn): stOut = "" for i in range (len(stIn)): s = stIn[i] if ord(s) >= 127: # hex value of latin1 char --> last 2 chars of rtf code hexchars = hex(ord(s)) hexchars = hexchars.replace('0x', '') s = "\\\'" + hexchars elif s == '|': s = '\\par\r\n' stOut += s return RTFPREFIX + stOut + RTFSUFFIX def replace_rtf_codes(self, stIn): accentLoc = stIn.find('\\\'') while accentLoc >= 0: # Get last 2 characters of string \'xx = hex char value rtfCode = stIn[accentLoc + 2:accentLoc + 4] # convert to latin1 character latin1 = chr(int(rtfCode, 16)) if latin1: stIn = stIn.replace(rtfCode, latin1, 1) stIn = stIn.replace("\\'", '', 1) accentLoc = stIn.find('\\\'') stIn = stIn.replace('\\par', '|') stIn = stIn.replace('\\cf1', '') stIn = stIn.replace('\\cf0', '') stIn = re.sub(r'\\lang[0-9]*', '', stIn) stIn = stIn.replace(' ', ' ') # KLUDGE to handle unicode weirdness val = reUnicode.search(stIn) if val: uni = val.group(2) # lop off final character from unicode sequence stIn = stIn.replace(uni, uni[:-1]) return(stIn) def rtf2latin(self, stRtf): if stRtf.find('{\\rtf', 0) != 0: return stRtf if len(stRtf) < 100: return stRtf start = stRtf.find('\\f0\\fs', 0) if start == -1: return stRtf start += 9 end = stRtf.find('\\par\r\n}\r\n', start + 1) if end == -1: return stRtf stOut = stRtf[start:end] stOut = stOut.replace('\\par\r\n', '\n') stOut = stOut.replace('\\{', '{') stOut = stOut.replace('\\}', '}') # KLUDGE to get rid of \langxxxx peculiarity that sometimes shows up # Might want to reinstate this if we can figure out what it means val = reDeLang.search(stOut) if val: lang1234 = val.group(1) stOut = stOut.replace(lang1234, "") stOut = stOut.strip(' \t') stOut = self.replace_rtf_codes(stOut) return stOut def get_composer0(self): if self.subtitle == '': return '' #Leave subtitle intact but mine for composer0 pcont = reParenContents.search(self.subtitle) if pcont: inParen = pcont.group(2) hyphenIndex = inParen.find(' - ') if hyphenIndex != -1: inParen = inParen[hyphenIndex + 3:] else: return '' return inParen def parse_document(self, doc): if doc == "": Fronimo.print_error(self.stFile, "No document") return False val = reGetDate.search(doc) # We have a date if val: self.date = val.group(1) self.date = self.date.strip() val = reApproxDate.search(self.date) if val: caMark = val.group(1) if caMark != "": self.date = self.date.replace(caMark, "") self.date = "c." + self.date val = re.search('\(\>?c?a?\.? ?[0-9]{4}\), *([fp#%]{1,2}\.* *[^.]*)\.?$', doc, re.DOTALL) if val: self.page = val.group(1) self.page = self.page.replace(" ", "") else: self.page = "" # this is not really an error, per se. # Fronimo.print_error(self.stFile, "No page # in %s" % doc) else: self.date = "" Fronimo.print_error(self.stFile, "Cannot get date from %s" % doc) # Look for a page anyway (unlikely) val = re.search(', *([fp#%]{1,2}\.* *.*)\.$', doc, re.DOTALL) if val: self.page = val.group(1) else: self.page = "" Fronimo.print_error(self.stFile, "No page # in %s" % doc) # Get document without date and page val = re.search(' ?\(\>?c?a?\.? ?[12][0-9]{3}\)', doc) if val: end = val.span()[0] self.document = doc[:end] else: self.document = doc self.document = self.document.strip() val = re.search("([^,][^,]*), *v[. ] *([^,]+)", self.document) if val == None: self.volume = '' else: self.document = val.group(1) self.volume = val.group(2) return True #end of parse footnote def slugify(self,stIn): # stIn = stIn.lower() stIn = latin1_to_ascii(stIn) stIn = re.sub(': *', '_', stIn) stIn = stIn.replace("'", '_') stIn = re.sub(' +', '_', stIn) return(stIn) def make_pagedir(self): # document and volume obtained in parse_footnote # and self.source specified in populate if not in parse_footnote # Handle the source field first src = self.source if re.match('[A-Z]+-[A-Z]+[a-z]*', src): #it's a library, so slugify it src = self.slugify(src) else: # It's a proper name: a composer, intabulator, publisher, #anthologist, etc. # First check if it is in names list src = src.replace('?', '') src = src.replace('\n', '') found = False for item in self.nameList: item[2] = firstName item[1] = lastName if src == firstName + ' ' + lastName: #slugify it; item[0] contains existing slug src = item[0] #For last name with spaces src = re.sub(' +', '_', src) found = True break if not found: # add to list of unknown names self.write_new_name(src) # So there won't be a knowable facsimile directory # if no knowable source field # So no point in continuing return '' # Source found. Now handle document field document = self.slugify(document) # The slug starts with lower case, but we want the directory # to start with uppercase src[0] = src[0].upper facurl = FACHEAD + src + '/' + document # 2 different handlings depending on whether there is a volume mydate = self.date.replace('>','-') if self.volume: # make entry for volume directory, appending date # assumes a document directory doesn't have appended dates # when there are volumes and the volume files carry the date stVol = "v." + self.volume + '_' + mydate pageDir = facurl + '/' + stVol else: # assumes document directory has appended date if no volumes pageDir = facurl + '_' + mydate return pageDir # end of make_pagedir def parse_credits(self, cred): # expand abbreviations [Ee]d. [Ee]nc, and & cred = cred.replace(' & ', ' and ') cred = re.sub('\.$', '', cred) if reEd.search(cred): cred = cred.replace('d.', 'dited', 1) if reEnc.search(cred): cred = cred.replace('nc.', 'ncoded', 1) # Change [IiEe]ntabulated to Encoded val = reEntabulated.search(cred) if val: src = val.group(1) cred = cred.replace(src, "Encoded") #Handle "by", "and", and ";" in credits string val = re.search('[Ee](?:dited|ncoded) and [Ee](?:dited|ncoded) by (..*)$', cred, re.DOTALL) if val: if val.group(1) == 'S.Gerbode': self.editor = self.encoder = 'Sarge Gerbode' else: self.encoder = val.group(1) self.encoder = self.encoder.replace('S.Gerbode','Sarge Gerbode') self.editor = self.encoder return True val = re.search('(E(?:ncoded|dited)) by (..+) ?(?:[;.]|and) ([Ee](?:dited|ncoded)) by (..*)$', cred, re.DOTALL) if val: type1 = val.group(1) cred1 = val.group(2) type2 = val.group(3) cred2 = val.group(4) if type1 == "Edited": self.editor = cred1.strip() self.encoder= cred2.strip() else: self.encoder = cred1.strip() self.editor = cred2.strip() self.editor = self.editor.replace('S.Gerbode', 'Sarge Gerbode') self.encoder = self.encoder.replace('S.Gerbode', 'Sarge Gerbode') return True self.editor = self.encoder = "" return False # End parse_credits # sets source, document, volume, date, page, encoder, editor def parse_footnote(self): self.source = self.document = self.volume = self.date = self.page = self.encoder = self.editor = "" if self.footnote == "": return lsParts = re.split(' +', self.footnote) numParts = len(lsParts) if numParts < 2 or numParts > 3: stErr = "Footnote \"%s\" has wrong # of parts (%d)" % (self.footnote, numParts) Fronimo.print_error(self.stFile, stErr) return False if numParts == 2: # source == ''; later, source will = composer doc, cred = lsParts else: self.source, doc, cred = lsParts # return False if not self.parse_document(doc): Fronimo.print_error(self.stFile, "Cannot parse document:%s" % doc) return False if not self.parse_credits(cred): Fronimo.print_error(self.stFile, "Cannot parse credits: %s." % cred) return True # End parse_footnote def get_diff_val(self, difficulty): diff = difficulty.strip() if diff: diff = difficulty[:3] # convert to lower case diff = diff.lower() else: diff = '3' # Medium is default difficulty value if diff in ['0', '1', '2', '3', '4', '5', '6']: return ord(diff) - 48 elif diff == "beg": return 1 elif diff in ('eas', 'sim'): return 2 elif diff == 'med': return 3 elif diff == 'cha': return 4 elif diff in ('dif', 'har'): return 5 elif diff in ('vir', 'kil'): return 6 else: stErr = "Difficulty value \"%s\" not meaningful" % (difficulty) Fronimo.print_error(self.stFile, stErr) return 0 def parse_info(self): # initialize with existing values global instCount global partCount lsInfo = re.split('\n', self.info) isRemark = False remarks = "" for datum in lsInfo: datum = datum.strip() # Ignore blank lnes if datum == "": continue if isRemark: if remarks: remarks = remarks + '|' + datum else: remarks = datum else: isRemark = (datum.find('--') == 0) if isRemark: # everything after line stating with '--' is a remark continue if datum.find(':') == -1: continue # so it is a field # decode it reItem = reInfo.search(datum) if reItem: field = reItem.group(1) value = reItem.group(2) value = value.strip() field = field.lower() if field in ["tra", "rea", "arr"]: self.arranger = value elif field in ["lib", "sou", "pub" ]: self.source = value elif field in["ins", "ens" ]: self.ensemble = value elif field in ["doc"]: self.document = value elif field in ["ori", "co0"]: self.composer0 = value elif field == "tit": self.title = value elif field == "sub": self.subtitle = value elif field == "com": self.composer = value elif field == "doc": self.document = value elif field == "pag": self.page = value elif field == "edi": self.editor = value elif field == "enc": self.encoder = value elif field == "int": self.intabulator = value elif field == "con": self.concordances = value elif field == "ctr": self.contributor = value elif field == "pie": self.piece = value elif field == "fac": self.facurl = value elif field == "rec": self.recurl = value elif field == "sec": self.section = value elif field == "typ": self.type = value elif field == "key": self.key = value elif field == "dif": self.difficulty = self.get_diff_val(value) elif field == "par": self.part = value else: stErr = "Info field \"%s\ ""not found" %(field) Fronimo.print_error(self.stFile, stErr) else: continue self.remarks = remarks # checking to see of all parts list items are in the ensemble list. # first collect all items in the ensemble, including tags ensList = self.ensemble.split(",") insList = [] ensItems = [] for ens in ensList: ens = ens.strip() ens = ens.lower() if ens.find(":") > 0: instag = ens.split(":") # Include tags for part check but not for instrument check ensItems.append(instag[0].strip()) ensItems.append(instag[1].strip()) insList.append(instag[1].strip()) else: insList.append(ens) ensItems.append(ens) # Then check part list item against them if self.part != "": partList = self.part.split(",") for ins in partList: ins = ins.strip() ins = ins.lower() if ins == "score": continue if not ins in ensItems: stErr = "Part \"%s\" not in ensemble list" % (ins) partCount += 1 Fronimo.print_error(self.stFile, stErr) for ins in insList: found = False ins = ins.strip() ins = ins.lower() for item in Fronimo.instList: # if item[0] == ins: if item[1] == ins: found = True break if not found: instCount += 1 stErr = "Inst. \"%s\" not found in \"%s\"." % (ins, self.stFile) print(stErr, file=Fronimo.flNewInsts) Fronimo.flNewInsts.flush() # end of for datum in lsInfo return True # end of parse_info def write_new_name(self, newName): global nameCount nameCount += 1 stOut = "%s --> %s" % (newName, self.stFile) print(stOut, file=Fronimo.flNewNames) Fronimo.flNewNames.flush() def check_name(self, name): found = False name = name.replace('?', '') name = name.replace('\n', '') # write out list of names not found in names list val = reSplitAndOr.search(name) if val: lsName = [val.group(1)] + [val.group(2)] elif name: lsName = [name] else: lsName = [] for nm in lsName: for item in Fronimo.nameList: firstName = item[2] lastName = item[1] stTest = item[2] + ' ' + item[1] stTest = stTest.strip() if nm == stTest: return True # if not found: self.write_new_name(nm) return False # end of check_name #Starts with an uncompressed fronimo string (stFron) and reads # values into an instance of a fronimo class object. def populate(self): global facCount #Assume we can parse footnote unless proven otherwise. fFootnote = True self.offset = self.start_offset pg = self._getBstr() # Skip page number string self.footnote = self._getBstr() self.footnote = self.footnote.strip() if not self.parse_footnote(): Fronimo.print_error(self.stFile, "Cannot parse footnote:\n%s" % self.footnote) fFootnote = False CPiecePos = self.stFron.find('CPiece') # This is very unlikely to happen if CPiecePos == -1: Fronimo.print_error(self.stFile, "Cannot find pattern 'CPiece'") return False self.offset = CPiecePos + 14 self.title = self._getBstr() self.title = self.title.strip(' \t') self.title = self.rtf2latin(self.title) self.subtitle = self._getBstr() self.subtitle = self.subtitle.strip('[\t ]') self.subtitle = self.rtf2latin(self.subtitle) self.subtitle = self.subtitle.replace('\n', '|') self.composer0 = self.get_composer0() self.composer = self._getBstr() self.composer = self.composer.strip('[\t ]') self.composer = self.rtf2latin(self.composer) if self.source == "": self.source = self.composer.replace('?', '') if self.source == 'Anonymous': self.source = 'Unknown' self._getBstr() # Discard text at beginning and end of section self._getBstr() self.info = self._getBstr() self.info = self.info.strip() self.info = self.info.replace ('\r', '') # Skip key field; will get key from info field self.composer = self.composer.replace('Anon.', 'Anonymous') self.composer0 = self.composer0.replace('Anon.', 'Anonymous') if self.info: if not self.parse_info(): Fronimo.print_error(self.stFile, "Cannot parse info string.") else: Fronimo.print_error(self.stFile, "No info data.") self.mtime = int(os.path.getmtime(self.stFile)) self.ctime = int(os.path.getctime(self.stFile)) return True # End Populate def make_info(self): myInfo = 'key: ' + self.key + '\r\n' myInfo = myInfo + 'type: ' + self.type + '\r\n' myInfo = myInfo + 'ens: ' + self.ensemble + '\r\n' myInfo = myInfo + 'diff: ' + difficulties[int(self.difficulty)] if self.remarks: myInfo = myInfo + '\r\n--\r\n' + self.remarks return(myInfo) def make_footnote(self): if self.source != self.composer: stFootnote = self.source else: stFootnote = '' stFootnote = stFootnote + ' ' + self.document if self.volume != '': stFootnote = stFootnote + ', v.' + str(self.volume) stFootnote = stFootnote + ' (' + self.date + '), ' page = self.page if re.match("^[0-9]", page): stMark = 'p.' elif re.match("^[a-zA-Z]", page): stMark = 'f.' else: stMark = page[0] stFootnote = stFootnote + stMark + self.page + '. ' if self.editor == self.encoder: stFootnote = stFootnote + 'Encoded and edited by ' + self.encoder + '.' else: stFootnote = stFootnote + 'Encoded by ' + self.encoder + '; edited by ' + self.editor + '.' return stFootnote # Takes data from a fronimo class instance; inserts it into existing stFron def depopulate(self): self.offset = self.start_offset # throw away page number string and update offset pg = self._getBstr() self.footnote = self.make_footnote() self._put(self.footnote) newPos = self.stFron.find('CPiece') # This is very unlikely to happen if newPos == -1: Fronimo.print_error(self.stFile, "Cannot find pattern 'CPiece'") return False self.offset = newPos + 14 title = self.latin2rtf(self.title) self._put(title) self.subtitle = self.subtitle.replace('\|', '\r\n') self.subtitle = self.latin2rtf(self.subtitle) self._put(self.subtitle) composer = self.latin2rtf(self.composer) self._put(composer) # Discard text at beginning and end of section self._getBstr() self._getBstr() info = self.make_info() self._put(info) return True # end depopulate # If from a template, prompts where to write it to. # If from an existing file, writes to that file # Maybe with a backup file, until all is known to be kosher def write_file(self): btFron = bytes(self.stFron, 'latin1') stOutfile = add_prefix_and_suffix(self.outfile) try: flOut = gzip.open(stOutfile, "wb") except: self.print_error(stOutfile, 'Cannot open file for writing') return False if flOut.write(btFron) != len(btFron): self.print_error(stOutfile, 'File not completely written') return False return True # end of write_file # end of class Fronimo def add_prefix_and_suffix(stIn): if stIn.find(PERSONDIR) != 0: stIn = PERSONDIR + stIn if not stIn.endswith('.ft3'): stIn = stIn + '.ft3' return(stIn) def del_prefix_and_suffix(stIn): if stIn.find(PERSONDIR) == 0: stIn = stIn.replace(PERSONDIR, '') if stIn.endswith('.ft3'): stIn = stIn.replace('.ft3', '') return(stIn) def process_form(diForm): dChanges = {} stIn = add_prefix_and_suffix(diForm['infile']) # Populate Fronimo object from input file fron = Fronimo(stIn) if fron.start_offset < 0: Fronimo.print_error(stIn,"Cannot populate Fronimo.") print('Cannot populate class Fronimo from file "', stIn, '.') return False # load items from form into Fronimo object fron.load_form(diForm) # create fronimo file string from fronimo object if not fron.depopulate(): Fronimo.print_error(stIn,"Cannot update fronimo file string.") return False else: if fron.write_file(): return True else: Fronimo.print_error(stIn,"Cannot write out fronimo file.") return False # end process_form