from unicode import latin1_to_ascii
import pdb
import re
import os
import sys
import time
import glob
# from pprint import pprint, pformat
import gzip
# import csv
import platform
# import inspect
# from functools import partial
import os.path
import readline

#Globals

difficulties = ["Beginner", "Easy", "Medium", "Challenge", "Difficult", "Virtuoso"]
currOS = platform.platform()
if currOS.find('Windows') == 0:
    LOCALBASE = 'D:/website/'
    #LOCALBASE = 'A:/test/'
else:
    LOCALBASE = '/mnt/d/website/'
#    LOCALBASE = '/ssd/home/sarge/prog/python/fron'

# HTTPBASE = "https://lutemusic.org/"
HTTPBASE = "file://"
CURRDIR = re.sub('\\\\+', '/', os.getcwd()) + '/'
FACHEAD = LOCALBASE + "facsimiles/"
# instCount = 0   #count of new/incorrect instruments
# partCount = 0   #count of items in partCount that are int in ensemble
# typeCount = 0   #count of new/incorrect types
# facCount = 0    #count of unmatched facsimile strings
# newFacs = 0 # Count of unmatched missing facsimiles
nameCount = 0   #count of new/incorrect proper names: composers, publishers, anthologists, etc.
diData = {}

def join_with_fslash(s1, s2):
    return(s1 + '/' + s2)
# end join_with_fslash

startTime = int(time.time())
#Data/input files contained in local website base
NAMESFILE = LOCALBASE + "names.tsv"
INSTSFILE = LOCALBASE + "insts.tsv"
TYPEFILE = LOCALBASE + "types.tsv"
CONTRIBDIR = LOCALBASE + 'contributors/'
TEMPLATE = CONTRIBDIR + 'template.ft3'
#This will change when everything is debugged
#PERSONALDIR = CONTRIBDIR + lastName + firstName[0]
PERSONDIR = CONTRIBDIR
MISSINGPAGES = LOCALBASE + "missing_pages.txt"
NOFACDIRS = LOCALBASE + "noFacDirs.txt"
# Error/output files located in current directory
NEWFACDIRS = LOCALBASE + "newFacDirs.txt"
NEWNAMES = CURRDIR + "newnames.txt"
# Nonexistent facsimiles in valid directories
FACERRS = CURRDIR + "facerrs.txt"
# NEWTYPES = CURRDIR + "newtypes.txt"
# NEWINSTS = CURRDIR + "newinsts.txt"
# DERRFILE = CURRDIR + "dfterrs.txt"
# CERRFILE = CURRDIR + "cfterrs.txt"
ERRFILE = CURRDIR + "errors"
# TERRFILE = CURRDIR + "tfterrs.txt"
TSVFILE = CURRDIR + "dft.tsv"
JSONFILE = CURRDIR + "dft.json"
# directories where we don't look for fronimo files
# BADDIRS = ['midi', 'tabs', 'pdf', 'other', 'videos', 'TEMP', 'old_dft.pls',
# 'fronimo', 'images', 'icons', 'index_files', 'ftp', 'facsimiles',
# 'making_lute_music_accessible_files', 'contributors']
# For converting RTF files to latin1 and vice versa
RTFPREFIX = "{\\rtf1\\ansi\\ansicpg1252\\deff0\\deflang1033{\\fonttbl{\\f0\\fnil\\fcharset0 MS Shell\r\nDlg;}}\r\n\\viewkind4\\uc1\\pard\\f0\\fs-22 ";
RTFSUFFIX = "\\par\r\n}\r\n";
IMAGETYPES = ('.png', '.tif', '.pdf', '.jpg')

# Compiled regular expressions
#use re.DOTALL to match \n as well
# = number in a series; % = arbitrary number to make source unique
reUnicode = re.compile('(\\\\u.... )')
reSplitAndOr = re.compile('(..*) (?:and|or) (..*)')
reGetDate = re.compile('\D\((>?c?a?\.? ?[1-2][0-9]{3})\)', re.DOTALL)
reApproxDate = re.compile('(ca?\.? ?)[1-2][0-9]{3}', re.DOTALL)
reParenContents = re.compile('^([^(]*)\(([^)]+)\)(.*)', re.DOTALL)
reEntabulated = re.compile('([IiEe]ntabulated)', re.DOTALL)
# reEncoded = re.compile('[Ee](ncoded)', re.DOTALL)
# reEdited = re.compile('[Ee](dited)', re.DOTALL)
reEnc = re.compile(r'[Ee](nc\.)', 1)
reEd = re.compile(r'[Ee](d\.)', 1)
reInfo = re.compile('^(...).*?: *(..*)$', re.DOTALL)
reKey = re.compile('^[A-G][b#]*[Mm]$',re.DOTALL)
# reGetDictItem = re.compile('"([^"]*)"[^"]*"([^"]*)"')
reDeLang = re.compile(r'(\\lang[0-9]{4)}')

# These are for error messages
# for current func name, specify n = 0 or no argument.
# for name of caller of current func, specify 1.
# for name of caller of caller of current func, specify 2. etc.
funcName = lambda n=1: sys._getframe(n + 1).f_code.co_name
callerName = lambda n=2: sys._getframe(n + 1).f_code.co_name
lineNo = lambda n=1: sys._getframe(n + 1).f_lineno

field_map = [
    ("Input", "stInfile"),
    ("Title", "title"),
    ("Subtitle", "subtitle"),
    ("Composer", "composer"),
    ("Orig. composer", "composer0"),
    ("Ensemble", "ensemble"),
    ("Part", "part"),
    ("Key", "key"),
    ("Difficulty", "difficulty"),
    ("Type", "type"),
    ("Source", "source"),
    ("Document", "document"),
    ("Volume", "volume"),
    ("Date", "date"),
    ("Page", "page"),
    ("Section", "section"),
    ("Editor", "editor"),
    ("Encoder", "encoder"),
    ("Intabulator", "intabulator"),
    ("Arranger", "arranger"),
    ("Contributor", "contributor"),
    ("Remarks", "remarks"),
    ("Output", "outfile"),
    ("Footnote", "footnote"),
    ("Concordances", "concordances"),
    ("Info", "info"),
    ("Piece", "piece"),
    ("Recording", "recurl"),
    ("Facsimile", "facurl"),
    ("PDF", "stPdf"),
    ("Midi", "stMidi"),
    ("Modified", "mtime"),
    ("Created", "ctime"),
    ]

'''
key_order = [   # for dumping
    "title", "subtitle",
    "composer", "composer0",
    "footnote", "source", "document", "volume", "date", "page",
    "editor", "encoder", "arranger", "intabulator",
    "contributor", "concordances",
    "info", "piece", "section",
    "type", "key", "difficulty",
    "ensemble",
    "part",
    "remarks","recurl", "facurl", 
    "stFile", "stPdf", "stMidi",
    "ctime", "mtime",
    ]
'''

#Creates fronimo object from file with name stIn
class Fronimo:
    def __init__(self, stIn = TEMPLATE):
        # set all attributes to empty string
        for col,att in field_map:
            setattr(self, att, "")
        self.base = LOCALBASE
        self.currProg = ''
        self.credits = ''
        self.begText = ''
        self.endText = ''
        self.performance = False
        self.simple = False
        self.ornamented = False
        self.footnote = ""
        #input file name set from argument stIn
        self.stFile = stIn
        self.stFron = ''
        self.flFron = None
        self.flOut = None
        self.flErr = None
        self.offset = None
        self.oldOffset = None
        self.volume = ""
        self.info = ""
        self.recurl = ""
        self.facurl = ""
        # Creates self.stFron from fronimo file: self.stFile
        if self.read_and_unzip_file(self.stFile) == False:
            print("OSError: Cannot read and unzip input file: ", \
            self.stFile, file=sys.stderr)
            # marker for failure of class instantiation
            self.start_offset = -1
        else:
            self.start_offset = self.get_start_offset()
            if self.start_offset > 0:
                # Fills in other values of fronimo object.
                if self.populate() == False:
                    self.start_offset = -1
# end of  __init___

    # Open error file
    @classmethod
    def open_error(kls, errfile):
        try:
            kls.flErr = open(errfile, "w", encoding = 'latin1')
        except OSError:
            print("OSError: Cannot open error File ", errfile,
            " for writing.", file=sys.stderr)
            return False
        except:
            print("Other error: Cannot open error File ", errfile,
            " for writing.", file=sys.stderr)
            return False
        return True
# End open_error

    @classmethod
    def open_file(kls, fl, mode):
        try:
            flName = open(fl, mode, encoding='latin1')
        except OSError:
            err= "OSError: Cannot open file %s in mode %s." % (fl,  mode)
            kls.print_error(fl, err)
            return None
        except:
            err= "Other error: Cannot open file %s in mode %s." % (fl,  mode)
            kls.print_error(fl, err)
            return None
        return flName
# end of openFile

# open and read all needed fronimo-related files
    @classmethod
    def open_files(kls):
        kls.nameList = []
        kls.typeList = []
        kls.instList = []
        kls.noFacList = []

        # Read in and process list of recognized names
        kls.flNames = kls.open_file(NAMESFILE, "r")
        if not kls.flNames:
            print("Cannot open NAMESFILE file.")
            return False
        stFacNames = kls.flNames.read()
        lsIn = stFacNames.split('\n')
        for line in lsIn:
            lsRec = line.split('\t')
            # Must have at least last name and first name.
            if len(lsRec) < 2:
                continue
            # First, get the default directory name for this proper name source
            # Capitalized last name + capitalized first 1st ltr of 1st name
            # Get the first and last names from 1st and 2nd list record field
            last = lsRec[0]
            if len(lsRec) >= 2:
                first = lsRec[1]
            else:
                first = ""
            if first != '':
                slug = last + first.upper()[0]
            else:
                slug = last
            # Remove accents
            slug = latin1_to_ascii(slug)
            # Prepend it to the list record
            lsRec.insert(0, slug)
            # and add the record to the names list.
            # So nameList now has slug, lastName, firstName
            #forget other fields in names.tsv file, for now
            while len(lsRec) > 3:
                lsRec.pop(len(lsRec) - 1)
            kls.nameList.append(lsRec)

        # Read in instrument list
        kls.flInsts = kls.open_file(INSTSFILE, "r")
        if not kls.flInsts:
            print("Cannot open INSTSFILE file.")
            return False
        stInst = kls.flInsts.read()
        lsIn = stInst.split('\n')
        for line in lsIn:
            lsRec = line.split('\t')
            kls.instList.append(lsRec)

        # Read in types list
        kls.flTypes = kls.open_file(TYPEFILE, "r")
        if not kls.flTypes:
            print("Cannot open TYPEFILE file.")
            return False
        stType = kls.flTypes.read()
        lsIn = stType.split('\n')
        for line in lsIn:
            #NEW!!
            # get rid of leading tabs
            line = line.strip()
            lsRec = line.split('\t')
            kls.typeList.append(lsRec)

        #Read in past references to nonexistent facsimiles
        #A time saver, but needs to be updated as facsimiles are added
#       kls.flNoFacDirs = kls.open_file(NOFACDIRS, "r")
#       if not kls.flNoFacDirs:
#           print("Cannot open NOFACDIRS file.")
#           return False
        # Read in the whole list as is
#       stIn = kls.flNoFacDirs.read()
#       kls.noFacList = stIn.split('\n')

        #Read in missing pages list
        #A time saver, but needs to be updated as facsimiles are added
#       kls.flMissingPages = kls.open_file(MISSINGPAGES, "r")
#       if not kls.flMissingPages:
#           print("Cannot open MISSINGPAGES file.")
#           return False
        # Read in the whole list as is
#       stIn = kls.flMissingPages.read()
#       kls.missingPageList = stIn.split('\n')

#       kls.flNewFacDirs = kls.open_file(NEWFACDIRS, "w")
#       if not kls.flNewFacDirs:
#           print("Cannot open NEWFACDIRS file.")
#           return False

        #Open error lists
        kls.flNewNames = kls.open_file(NEWNAMES, "w")
        if not kls.flNewNames:
            print("Cannot open NEWNAMES file.")
            return False
#       kls.flNewTypes = kls.open_file(NEWTYPES, "w")
#       if not kls.flNewTypes:
#           print("Cannot open NEWTYPES file.")
#           return False
#       kls.flNewInsts = kls.open_file(NEWINSTS, "w")
#       if not kls.flNewInsts:
#           print("Cannot open NEWINSTS file.")
#           return False
#       kls.flFacErrs = kls.open_file(FACERRS, "w")
#       if not kls.flFacErrs:
#           print("Cannot open FACERRS file.")
#           return False
        return True
# end open_files

    # Get canonical name for type
    @classmethod
    def find_canonical_type(kls,typ):
        typ = typ.strip()
        typ = typ.lower()
        for item in kls.typeList:
            if item[0] == typ:
                return item[1]
        return None
    #end find_canonical_type
        
    # Finds all types in a hierarchy below a given canonical type
    @classmethod
    def get_all_types(kls, typ):
        typOut = [typ]
        for t in kls.typeList:
            if len(t) == 3: #only look at items that have higher types
                # if there is an "and"
                tpH = t[2].split('&')
                # See if the search pattern matches one of the alternatives
                if typ in tpH:
                    newType = kls.get_all_types(t[1])
                    if newType:
                        # No duplications allowed
                        if not newType in typOut:
                            typOut = typOut + newType
        return typOut

    # Starts with a comma-separated list of types, validates them,
    #gets canonical name, # and returns the validated list with all sub-types
    @classmethod
    def get_type_list(kls, typesIn):
        typelist = typesIn.split(',')
        typesOut = []
        for typ in typelist:
            t = kls.find_canonical_type(typ)
            if t == None:
                print("Type %s not found." % typ)
#               Fronimo.print_error("Type %s not found." % typ)
                continue
            else:
                typesOut += kls.get_all_types(t)
        return(typesOut) 

# print helpful error message
    @classmethod
    def print_error(kls, currFile, errMsg):
        print("function:%s;caller:%s; line:%d\nfile:'%s'; %s." % (funcName(), callerName(), lineNo(), currFile, errMsg), file=kls.flErr)
        kls.flErr.flush()

# Get starting offset for reading/writing a fronimo file
    def get_start_offset(self):
        if (self.stFron[4] == '\x15') or (self.stFron[4] == '\x14'):
            return 364
        elif self.stFron[4] == '\x16':
            return 368
        else:
            val = hex(ord(self.stFron[4]))
            msg = "stFron[4] = %s (not \\x15 or \\x16), so starting offset is unknown" % (val)
            Fronimo.print_error(self.stFile, msg)
            return -1
# end get_start_offset
    
# load values from a form into a Fronimo class instance
    def load_form(self, diForm):
        for key, value in diForm.items():
            setattr(self, key, value)

# end of load_changes

# read and unzip a fronimo file: self.flFile into self.stFron
# Fronimo files are zipped
    def read_and_unzip_file(self, stIn):
        self.flFron = gzip.open(stIn)
        if self.flFron == None:
            Fronimo.print_error(stIn, 'Cannot open and unzip fronimo file.')
            return False
        # Read entire file into a global byte array
        btFron = self.flFron.read()
        if len(btFron) < 100:
            Fronimo.print_error(self.stFile, 'Cannot read fronimo file.')
            return False
        self.stFron = btFron.decode("latin1")
        if len(self.stFron) < 100:
            Fronimo.print_error(self.stFile, 'Cannot decode fronimo file.')
            return False
        return True
# end of read_and_unzip_file

    # get x number of chars from string. Updates offset value
    def _get(self, numChars):
        if numChars == 0:
            return ""
        oldOffset = self.offset
        self.offset += numChars
        if (self.offset) > len(self.stFron):
            errMsg = "Cannot get %d chars starting at offset %d" % (numChars, oldOffset)
            Fronimo.print_error(self.stFile, errMsg)
            return ""
        return self.stFron[oldOffset:self.offset]

    # Get two bytes of info
    def _getWord(self, fSigned):
        inCh = self._get(1)
        if inCh == "":
            errMsg = "No first byte in stFron at offset %d" % (self.offset)
            Fronimo.print_error(self.stFile, errMsg)
            return -1
        word1 = ord(inCh)
        inCh = self._get(1)
        if inCh == "":
            errMsg = "No 2nd byte in stFron at offset %d" % (self.offset)
            Fronimo.print_error(self.stFile, errMsg)
            return -1
        word2 = 256 * ord(inCh)
        word = word1 + word2
        if (fSigned and word > 32768):
            word -= 65536
        return word

    # Get a fronimo-formatted string
    def _getBstr(self):
        # first byte is string length if < 255
        firstByte = self._get(1)
        if firstByte == False:
            errMsg = "No first byte in stFron at offset %d" % (self.offset)
            Fronimo.print_error(self.stFile, errMsg)
            return ""
        length = ord(firstByte)
        if length == 0:
            return ""
        # First char 255 means a long string.
        # Next 2 chars determine string length as an unsigned integer
        if length == 255:
            length = self._getWord(False)
            if length == -1:
                errMsg = "Zero string length from _getWord, offset %d" % (self.offset)
                Fronimo.print_error(self.stFile, errMsg)
                return ""
        stOut = self._get(length)
        if stOut == "":
            errMsg = "Result of get(length) is "", at offset %d" % (self.offset)
            Fronimo.print_error(self.stFile, errMsg)
            return ""
        return stOut
        
    def _put(self, stNew):
        # Assumes new string is RTF'd, if necessary, but not in fronimo string format,
        # with leading length indicator
        # Find length of old fronimo string
        ch = self._get(1)
        if ch == '\xFF': # means next 2 chars determine length
            byte1 = self._get(1)
            if byte1 == "":
                errMsg = "No first byte in stFron at offset %d" % (self.offset)
                Fronimo.print_error(self.stFile, errMsg)
                return False
            word1 = ord(byte1)
            byte2 = self._get(1)
            if byte2 == "":
                errMsg = "No second byte in stFron at offset %d" % (self.offset)
                Fronimo.print_error(self.stFile, errMsg)
                return False
            # second byte is higher order
            word2 = 256 * ord(byte2)
            # Length of actual string + 3 bytes to specify the length
            length = word1 + word2 + 3
            # reset offset to compensate for 3 _get(1)'s
            self.offset -= 3
        else:
            # Length of actual string + 1 byte to specify the length
            length = ord(ch) + 1
            # reset offset to compensate for 1 _get(1)
            self.offset -= 1
        # find length of new string
        newLen = len(stNew)
        if newLen < 255:
            stInsert = chr(newLen)
            # We will add one length indicator to the head of the string
            newLen += 1
        else:
            stInsert = chr(255) + chr(newLen % 256) + chr(int(newLen / 256))
            # We will add three length indicators to the head of the string
            newLen += 3
        # Add leading length indicator
        stNew = stInsert + stNew
        # splice in the new string
        stOut = self.stFron[:self.offset] + stNew + self.stFron[self.offset + length:]
        # set new offset
        self.offset += newLen
        self.stFron = stOut
        return True
# end _put
        

    def latin2rtf(self, stIn):
        stOut = ""
        for i in range (len(stIn)):
            s = stIn[i]
            if ord(s) >= 127:
                # hex value of latin1 char --> last 2 chars of rtf code
                hexchars = hex(ord(s))
                hexchars = hexchars.replace('0x', '')
                s = "\\\'" + hexchars
            elif s == '|':
                s = '\\par\r\n'
            stOut += s
        return RTFPREFIX + stOut + RTFSUFFIX


    def replace_rtf_codes(self, stIn):
        accentLoc = stIn.find('\\\'')
        while accentLoc >= 0:
            # Get last 2 characters of string \'xx = hex char value
            rtfCode = stIn[accentLoc + 2:accentLoc + 4]
            # convert to latin1 character
            latin1 = chr(int(rtfCode, 16))
            if latin1:
                stIn = stIn.replace(rtfCode, latin1, 1)
            stIn = stIn.replace("\\'", '', 1)
            accentLoc = stIn.find('\\\'')
        stIn = stIn.replace('\\par', '|')
        stIn = stIn.replace('\\cf1', '')
        stIn = stIn.replace('\\cf0', '')
        stIn = re.sub(r'\\lang[0-9]*', '', stIn)
        stIn = stIn.replace('  ', ' ')
        # KLUDGE to handle unicode weirdness
        val = reUnicode.search(stIn)
        if val:
            uni = val.group(2)
            # lop off final character from unicode sequence
            stIn = stIn.replace(uni, uni[:-1])
        return(stIn)

    def rtf2latin(self, stRtf):
        if stRtf.find('{\\rtf', 0) != 0:
            return stRtf
        if len(stRtf) < 100:
            return stRtf
        start = stRtf.find('\\f0\\fs', 0)
        if start == -1:
            return stRtf
        start += 9
        end = stRtf.find('\\par\r\n}\r\n', start + 1)
        if end == -1:
            return stRtf
        stOut = stRtf[start:end]
        stOut = stOut.replace('\\par\r\n', '\n')
        stOut = stOut.replace('\\{', '{')
        stOut = stOut.replace('\\}', '}')
        # KLUDGE to get rid of \langxxxx peculiarity that sometimes shows up
        # Might want to reinstate this if we can figure out what it means
        val = reDeLang.search(stOut)
        if val:
            lang1234 = val.group(1)
            stOut = stOut.replace(lang1234, "")
        stOut = stOut.strip(' \t')
        stOut = self.replace_rtf_codes(stOut)
        return stOut

    def get_composer0(self):
        if self.subtitle == '':
            return ''
        #Leave subtitle intact but mine for composer0
        pcont = reParenContents.search(self.subtitle)
        if pcont:
            inParen = pcont.group(2)
            hyphenIndex = inParen.find(' - ')
            if  hyphenIndex != -1:
                inParen = inParen[hyphenIndex + 3:]
        else:
            return ''
        return inParen

    def parse_document(self, doc):
        if doc == "":
            Fronimo.print_error(self.stFile, "No document")
            return False
        val = reGetDate.search(doc)
        # We have a date
        if val:
            self.date = val.group(1)
            self.date = self.date.strip()
            val = reApproxDate.search(self.date)
            if val:
                caMark = val.group(1)
                if caMark != "":
                    self.date = self.date.replace(caMark, "")
                self.date = "c." + self.date
            val = re.search('\(\>?c?a?\.? ?[0-9]{4}\), *([fp#%]{1,2}\.* *[^.]*)\.?$',
                   doc, re.DOTALL)
            if val:
                self.page = val.group(1)
                self.page = self.page.replace(" ", "")
            else:
                self.page = ""
                # this is not really an error, per se.
                # Fronimo.print_error(self.stFile, "No page # in %s" % doc)
        else:
            self.date = ""
            Fronimo.print_error(self.stFile, "Cannot get date from %s" % doc)
            # Look for a page anyway (unlikely)
            val = re.search(', *([fp#%]{1,2}\.* *.*)\.$', doc, re.DOTALL)
            if val:
                self.page = val.group(1)
            else:
                self.page = ""
                Fronimo.print_error(self.stFile, "No page # in %s" % doc)
        # Get document without date and page
        val = re.search(' ?\(\>?c?a?\.? ?[12][0-9]{3}\)', doc)
        if val:
            end = val.span()[0]
            self.document = doc[:end]
        else:
            self.document = doc
        self.document = self.document.strip()
        val = re.search("([^,][^,]*), *v[. ] *([^,]+)", self.document)
        if val == None:
            self.volume = ''
        else:
            self.document = val.group(1)
            self.volume = val.group(2)
        return True
    #end of parse footnote

    def slugify(self,stIn):
#       stIn = stIn.lower()
        stIn = latin1_to_ascii(stIn)
        stIn = re.sub(': *', '_', stIn)
        stIn = stIn.replace("'", '_')
        stIn = re.sub(' +', '_', stIn)
        return(stIn)
        
    def make_pagedir(self):
        # document and volume obtained in parse_footnote
        # and self.source specified in populate if not in parse_footnote
        # Handle the source field first
        src = self.source
        if re.match('[A-Z]+-[A-Z]+[a-z]*', src):
            #it's a library, so slugify it
            src = self.slugify(src)
        else:
            # It's a proper name: a composer, intabulator, publisher,
            #anthologist, etc.
            # First check if it is in names list
            src = src.replace('?', '')
            src = src.replace('\n', '')
            found = False
            for item in self.nameList:
                item[2] = firstName
                item[1] = lastName
                if src == firstName + ' ' + lastName:
                    #slugify it; item[0] contains existing slug
                    src = item[0]
                    #For last name with spaces
                    src = re.sub(' +', '_', src)
                    found = True
                    break
            if not found:
                # add to list of unknown names
                self.write_new_name(src)
                # So there won't be a knowable facsimile directory
                # if no knowable source field
                # So no point in continuing
                return ''
        # Source found. Now handle document field
        document = self.slugify(document)
# The slug starts with lower case, but we want the directory to start with uppercase
        src[0] = src[0].upper
        facurl = FACHEAD + src + '/' + document
        # 2 different handlings depending on whether there is a volume
        mydate = self.date.replace('>','-')
        if self.volume:
            # make entry for volume directory, appending date
            # assumes a document directory doesn't have appended date when there are volumes.
            # and the volume files carry the date
            stVol = "v." + self.volume + '_' + mydate
            pageDir = facurl + '/' + stVol
        else:
            # assumes document directory has appended date if no volumes
            pageDir = facurl + '_' + mydate
        return pageDir
    # end of make_pagedir
        
    def parse_credits(self, cred):
        # expand abbreviations [Ee]d. [Ee]nc, and &
        cred = cred.replace(' & ', ' and ')
        cred = re.sub('\.$', '', cred)
        if reEd.search(cred):
            cred = cred.replace('d.', 'dited', 1)
        if reEnc.search(cred):
            cred = cred.replace('nc.', 'ncoded', 1)
        # Change [IiEe]ntabulated to Encoded
        val = reEntabulated.search(cred)
        if val:
            src = val.group(1)
            cred = cred.replace(src, "Encoded")
        #Handle "by", "and", and  ";" in credits string
        val = re.search('[Ee](?:dited|ncoded) and [Ee](?:dited|ncoded) by (..*)$', cred, re.DOTALL)
        if val:
            if val.group(1) == 'S.Gerbode':
                self.editor = self.encoder = 'Sarge Gerbode'
            else:
                self.encoder = val.group(1)
                self.encoder = self.encoder.replace('S.Gerbode','Sarge Gerbode')
                self.editor = self.encoder
            return True
        val = re.search('(E(?:ncoded|dited)) by (..+) ?(?:[;.]|and) ([Ee](?:dited|ncoded)) by (..*)$', cred, re.DOTALL)
        if val:
            type1 = val.group(1)
            cred1 = val.group(2)
            type2 = val.group(3)
            cred2 = val.group(4)
            if type1 == "Edited":
                self.editor = cred1.strip()
                self.encoder= cred2.strip()
            else:
                self.encoder = cred1.strip()
                self.editor = cred2.strip()
            self.editor = self.editor.replace('S.Gerbode', 'Sarge Gerbode')
            self.encoder = self.encoder.replace('S.Gerbode', 'Sarge Gerbode')
            return True
        self.editor = self.encoder = ""
        return False
    # End parse_credits

    # sets source, document, volume, date, page, encoder, editor
    def parse_footnote(self):
        self.source = self.document =  self.volume = self.date = self.page = self.encoder = self.editor = ""
        if self.footnote == "":
            return
        lsParts = re.split('  +', self.footnote)
        numParts = len(lsParts)
        if numParts < 2 or numParts > 3:
            stErr = "Footnote \"%s\" has wrong # of parts (%d)" % (self.footnote, numParts)
            Fronimo.print_error(self.stFile, stErr)
            return False
        if numParts == 2:
           # source == ''; later, source will = composer
           doc, cred = lsParts
        else:
            self.source, doc, cred = lsParts
#            return False
        if not self.parse_document(doc):
            Fronimo.print_error(self.stFile, "Cannot parse document:%s" % doc)
            return False
        if not self.parse_credits(cred):
            Fronimo.print_error(self.stFile, "Cannot parse credits: %s." % cred)
        return True
    # End parse_footnote

    def get_diff_val(self, difficulty):
        diff = difficulty.strip()
        if diff:
            diff = difficulty[:3]
            # convert to lower case
            diff = diff.lower()
        else:
            diff = '3' # Medium is default difficulty value
        if diff in ['0', '1', '2', '3', '4', '5', '6']:
            return ord(diff) - 48
        elif diff == "beg":
            return 1
        elif diff in ('eas', 'sim'):
            return 2
        elif diff == 'med':
            return 3
        elif diff == 'cha':
            return 4
        elif diff in ('dif', 'har'):
            return 5
        elif diff in ('vir', 'kil'):
            return 6
        else:
            stErr = "Difficulty value \"%s\" not meaningful" % (difficulty)
            Fronimo.print_error(self.stFile, stErr)
            return 0

    def parse_info(self):
        # initialize with existing values
        global instCount
        global partCount
        lsInfo = re.split('\n', self.info)
        isRemark = False
        remarks = ""
        for datum in lsInfo:
            datum = datum.strip()
            # Ignore blank lnes
            if datum == "":
                continue
            if isRemark:
                if remarks:
                    remarks = remarks + '|' + datum 
                else:
                    remarks = datum
            else:
                isRemark = (datum.find('--') == 0)
                if isRemark:
                # everything after line stating with '--' is a remark
                    continue
                if datum.find(':') == -1:
                    continue
                # so it is a field
                # decode it
                reItem = reInfo.search(datum)
                if reItem:
                    field = reItem.group(1)
                    value = reItem.group(2)
                    value = value.strip()
                    field = field.lower()
                    if field in ["tra", "rea", "arr"]:
                        self.arranger = value
                    elif field in ["lib", "sou", "pub" ]:
                        self.source = value
                    elif field in["ins", "ens" ]:
                        self.ensemble = value
                    elif field in ["doc"]:
                        self.document = value
                    elif field in ["ori", "co0"]:
                        self.composer0 = value
                    elif field == "tit":
                        self.title = value
                    elif field == "sub":
                        self.subtitle = value
                    elif field == "com":
                        self.composer = value
                    elif field == "doc":
                        self.document = value
                    elif field == "pag":
                        self.page = value
                    elif field == "edi":
                        self.editor = value
                    elif field == "enc":
                        self.encoder = value
                    elif field == "int":
                        self.intabulator = value
                    elif field == "con":
                        self.concordances = value
                    elif field == "ctr":
                        self.contributor = value
                    elif field == "pie":
                        self.piece = value
                    elif field == "fac":
                        self.facurl = value
                    elif field == "rec":
                        self.recurl = value
                    elif field == "sec":
                        self.section = value
                    elif field == "typ":
                        self.type = value
                    elif field == "key":
                        self.key = value
                    elif field == "dif":
                        self.difficulty = self.get_diff_val(value)
                    elif field == "par":
                        self.part = value
                    else:
                        stErr = "Info field \"%s\ ""not found" %(field)
                        Fronimo.print_error(self.stFile, stErr)
                else:
                    continue
        self.remarks = remarks
#       checking to see of all parts list items are in the ensemble list.
#       first collect all items in the ensemble, including tags
        ensList = self.ensemble.split(",")
        insList = []
        ensItems = []
        for ens in ensList:
            ens = ens.strip()
            ens = ens.lower()
            if ens.find(":") > 0:
                instag = ens.split(":")
#           Include tags for part check but not for instrument check
                ensItems.append(instag[0].strip())
                ensItems.append(instag[1].strip())
                insList.append(instag[1].strip())
            else:
                insList.append(ens)
                ensItems.append(ens)
#       Then check part list item against them
        if self.part != "":
            partList = self.part.split(",")
            for ins in partList:
                ins = ins.strip()
                ins = ins.lower()
                if ins == "score":
                    continue
                if not ins in ensItems:
                    stErr = "Part \"%s\" not in ensemble list" % (ins)
                    partCount += 1
                    Fronimo.print_error(self.stFile, stErr)
        for ins in insList:
            found = False
            ins = ins.strip()
            ins = ins.lower()
            for item in Fronimo.instList:
#               if item[0] == ins:
                if item[1] == ins:
                    found = True
                    break
            if not found:
                instCount += 1
                stErr = "Inst. \"%s\" not found in \"%s\"." % (ins, self.stFile)
                print(stErr, file=Fronimo.flNewInsts)
                Fronimo.flNewInsts.flush()
        # end of for datum in lsInfo
        return True
# end of parse_info

    def write_new_name(self, newName):
        global nameCount
        nameCount += 1
        stOut = "%s --> %s" % (newName, self.stFile)
        print(stOut, file=Fronimo.flNewNames)
        Fronimo.flNewNames.flush()

    def check_name(self, name):
        found = False
        name = name.replace('?', '')
        name = name.replace('\n', '')
        # write out list of names not found in names list
        val = reSplitAndOr.search(name)
        if val:
            lsName = [val.group(1)] + [val.group(2)]
        elif name:
            lsName = [name]
        else:
            lsName = []
        for nm in lsName:
            for item in Fronimo.nameList:
                firstName = item[2]
                lastName = item[1]
                stTest = item[2] + ' ' + item[1]
                stTest = stTest.strip()
                if nm == stTest:
                    return True
            # if not found:
            self.write_new_name(nm)
            return False
# end of check_name
'''
    def write_new_types(self):
        # get values from comma-separated list
        types = self.type.split(",")
        for typ in types:
            found = False
            typ = typ.strip()
            typ = typ.lower()
            # Forget questionable types
#           typ = typ.replace("?", "")
            for item in Fronimo.typeList:
                if item[0] == typ:
                    found = True
                    break
            if not found:
                global typeCount
                typeCount += 1
                stOut = "%s --> %s" % (typ, self.stFile)
                print(stOut, file=Fronimo.flNewTypes)
                Fronimo.flNewTypes.flush()
# end of write_new_types

# Get 0 padding for pages in the directory.
# returns [pad, suffix]
    def get_pad(self, pageDir):
        suffix = ''
        pad = 0
        lastPage = 0
        #get a list of page file names
        arPage = os.listdir(pageDir)
        for page in arPage:
            suf = page[-4:]
            suf = suf.lower()
            if not suf in ['.png', '.tif', '.pdf', '.jpg']:
                continue
            suffix = suf
            page = page[:-4]
            # strip position on page designation
            if page[-1] in list('abcdefghijklm'):
                pagePos = page[-1]
                page = page[:-1]
            else:
                # Get the last page number
                pagePos = ''
            # Characters to correct for mispagination in orig.
            # Like if there are 2 page 8's, second one could be 8s, third one 8t, etc.
            if page[-1] in ['x', 'y', 'z']:
                page = page[:-1]
            if page.isdigit():
               currPage = int(page) 
               if currPage > lastPage:
                   lastPage = currPage
        # file numbers are 0 padded
        pad = 0
        if lastPage >= 100:
            pad = 3
        elif lastPage >= 10:
            pad = 2
        elif lastPage >= 1:
            pad = 1
        return [pad, suffix]
#end get_pad

# Convert self.page into a valid directory entry
    def get_page_entry(self, pad):
        # blank page or page containing % or # guarantees no facsimile file
        if self.page == '' or re.search('[#%]', self.page) != None:
            return ''
        val = re.search('^[fp][fp]*\. *([^,.][^,.]*)', self.page)
        if val == None:
            errmsg = "cannot parse page # %s" % (self.page)
            Fronimo.print_error(self.stFile, errmsg)
            return ''
        else:
            page = val.group(1)
        # strip position on page designation
        if page[-1] in list('abcdefghijklm'):
            lastChar = page[-1]
            page = page[:-1]
        else:
            lastChar = ''
        if page[-1] == 'v':
            verso = 'v'
            page = page[:-1]
        else:
            verso = ''
        #To handle misnumberings, may have to add x, y, or z to page numbers.
        if page[-1] in ['x', 'y', 'z']:
            extra = page[-1]
            page = page[:-1]
        else:
            extra = ''
        # Special case for leading pages like 00a, 000b, etc.
        # or like a2, l4v, etc.
        if (not page.isdigit()):
            return page + extra + verso
        if pad > 1:
            page = page.zfill(pad)
        #Special case for pages like 000a, 000b, etc.
        if int(page) == 0:
            page = page + lastChar
        return(page + extra + verso)
# end get_page_entry

    def make_pagedir(self):
        # document and volume obtained in parse_footnote
        # and self.source specified in populate if not in parse_footnote
        # Handling the source field
        src = self.source
        if re.match('[A-Z]+-[A-Z]+[a-z]*', src):
            # Get rid of apostrophes, colons, and spaces in library name
            src = self.slugify(src)
        else:
            # It's a proper name, a composer, intabulator, publisher, anthologist, etc.
            # First check if it is in names list
            found = False
            for item in self.nameList:
                if src == item[2] + ' ' + item[1]:
                    #slugify it
                    src = item[0]
                    # Handle spaces in the last name
                    src = src.replace(' ', '_') 
                    found = True
                    break
            if not found:
                # it's not going to have a facurl if src is a compound name
                # but make sure we pick up any stray names anyway
                self.check_name(src)
                return('')
        # lower case document,get rid of accents and replace spaces and colons with underlines
        document = self.slugify(self.document)
        facurl = FACHEAD + src + '/' + document
        # 2 different handlings if there is a volume
        if self.volume:
            # make entry for volume directories, appending date
            # assumes a document directory doesn't have appended date when there are volumes.
            stVol = "v." + self.volume + '_' + self.date
            pageDir = facurl + '/' + stVol
        else:
            # assumes document directory has appended date if no volumes
            date = self.date.replace('>', '-')
            pageDir = facurl + '_' + date
        return pageDir
    # end of make_pagedir 

    def get_facurl(self, svPageDir = [''], svPad = [0], svSuffix = ['']):
        oldPageDir = svPageDir[0]
        oldPad = svPad[0]
        oldSuffix = svSuffix[0]
        # make directory that contains pages
        pageDir = self.make_pagedir()
        # No directory containing pages = no facsimile to find
        #is it a new page directory?
        if pageDir == oldPageDir:
            pad = oldPad
            suffix = oldSuffix
        else:
            if pageDir == '' or pageDir in self.noFacList:
                #reset everything to 0 for next time.
                svPageDir[0] = ''
                svSuffix[0] = ''
                svPad[0] = 0
                return ''
        # page directory exists
        # get new padding and suffix for the new page directory
        [pad, suffix] = self.get_pad(pageDir)
        # Set static variables for next time
        svPageDir[0] = pageDir
        svPad[0] = pad
        svSuffix[0] = suffix
        # get page file name without suffix in file directory from self.page, 0 padded if appropriate
        page = self.get_page_entry(pad)
        # no page, no facurl
        if page == '':
            return ''
        facurl = pageDir + '/' + page + suffix
        if os.path.isfile(facurl):
            return facurl
        else:
            # Facsimile directory exists, but facsimile file doesn't
            self.no_fac_page(facurl)
            return ''
    # end of get_facurl
'''
    #Starts with an uncompressed fronimo string (stFron) and reads values into
    # an instance of a fronimo class object.
    def populate(self):
        global facCount
        #Assume we can parse footnote unless proven otherwise.
        fFootnote = True
        self.offset = self.start_offset
        pg = self._getBstr() # Skip page number string
        self.footnote = self._getBstr()
        self.footnote = self.footnote.strip()
        if not self.parse_footnote():
            Fronimo.print_error(self.stFile,
            "Cannot parse footnote:\n%s" % self.footnote) 
            fFootnote = False
        CPiecePos = self.stFron.find('CPiece')
        # This is very unlikely to happen
        if CPiecePos == -1:
            Fronimo.print_error(self.stFile, "Cannot find pattern 'CPiece'")
            return False
        self.offset = CPiecePos + 14
        self.title = self._getBstr()
        self.title = self.title.strip(' \t')
        self.title = self.rtf2latin(self.title)
        self.subtitle = self._getBstr()
        self.subtitle = self.subtitle.strip('[\t ]')
        self.subtitle = self.rtf2latin(self.subtitle)
        self.subtitle = self.subtitle.replace('\n', '|')
        self.composer0 = self.get_composer0()
        self.composer = self._getBstr()
        self.composer = self.composer.strip('[\t ]')
        self.composer = self.rtf2latin(self.composer)
        if self.source == "":
            self.source = self.composer.replace('?', '')
        if self.source == 'Anonymous':
            self.source = 'Unknown'
        self._getBstr() # Discard text at beginning and end of section
        self._getBstr()
        self.info = self._getBstr()
        self.info = self.info.strip()
        self.info = self.info.replace ('\r', '')
        # Skip key field; will get key from info field
        # offset = stFron.find('CBAR',offset) + 10
        # keyNum = self._getWord(True)
        self.composer = self.composer.replace('Anon.', 'Anonymous')
        self.composer0 = self.composer0.replace('Anon.', 'Anonymous')
        #test to see if facsimile file exists
        #only if footnote parse ws OK.
        if fFootnote:
            self.facurl = self.get_facurl()
            if not self.facurl:
                facCount += 1
        if self.info:
            if not self.parse_info():
                Fronimo.print_error(self.stFile, "Cannot parse info string.")
        else:
            Fronimo.print_error(self.stFile, "No info data.")
        self.mtime = int(os.path.getmtime(self.stFile))
        self.ctime = int(os.path.getctime(self.stFile))
        return True
# End Populate

    def make_info(self):
        myInfo = 'key: ' + self.key + '\r\n'
        myInfo = myInfo + 'type: ' + self.type + '\r\n'
        myInfo = myInfo + 'ens: ' + self.ensemble + '\r\n'
        myInfo = myInfo + 'diff: ' + difficulties[int(self.difficulty)]
        if self.remarks:
            myInfo = myInfo + '\r\n--\r\n' + self.remarks
        return(myInfo)

    def make_footnote(self):
        if self.source != self.composer:
            stFootnote = self.source
        else:
            stFootnote = ''
        stFootnote = stFootnote + '  ' + self.document
        if self.volume != '':
            stFootnote = stFootnote + ', v.' + str(self.volume)
        stFootnote = stFootnote + ' (' + self.date + '), '
        page = self.page
        if re.match("^[0-9]", page):
            stMark = 'p.'
        elif re.match("^[a-zA-Z]", page):
            stMark = 'f.'
        else:
            stMark = page[0]
        stFootnote = stFootnote + stMark + self.page + '.  '
        if self.editor == self.encoder:
            stFootnote = stFootnote + 'Encoded and edited by ' + self.encoder + '.'
        else:
            stFootnote = stFootnote + 'Encoded by ' + self.encoder + '; edited by ' + self.editor + '.'
        return stFootnote

# Takes data from a fronimo class instance; inserts it into existing stFron
    def depopulate(self):
        self.offset = self.start_offset
        # throw away page number string and update offset
        pg = self._getBstr()
        self.footnote = self.make_footnote()
        self._put(self.footnote)
        newPos = self.stFron.find('CPiece')
        # This is very unlikely to happen
        if newPos == -1:
            Fronimo.print_error(self.stFile, "Cannot find pattern 'CPiece'")
            return False
        self.offset = newPos + 14
        title = self.latin2rtf(self.title)
        self._put(title)
        self.subtitle = self.subtitle.replace('\|', '\r\n')
        self.subtitle = self.latin2rtf(self.subtitle)
        self._put(self.subtitle)
        composer = self.latin2rtf(self.composer)
        self._put(composer)
        # Discard text at beginning and end of section
        self._getBstr()
        self._getBstr()
        info = self.make_info()
        self._put(info)
        return True
# end depopulate

    # If from a template, prompts where to write it to.
    # If from an existing file, writes to that file
    # Maybe with a backup file, until all is known to be kosher
    def write_file(self):
        btFron = bytes(self.stFron, 'latin1')
        stOutfile = add_prefix_and_suffix(self.outfile)
        try:
            flOut = gzip.open(stOutfile, "wb")
        except:
            self.print_error(stOutfile, 'Cannot open file for writing')
            return False
        if flOut.write(btFron) != len(btFron):
            self.print_error(stOutfile, 'File not completely written')
            return False
        return True
# end of write_file
# end of class Fronimo

def add_prefix_and_suffix(stIn):
    if stIn.find(PERSONDIR) != 0:
        stIn = PERSONDIR + stIn
    if not stIn.endswith('.ft3'):
        stIn = stIn + '.ft3'
    return(stIn)

def del_prefix_and_suffix(stIn):
    if stIn.find(PERSONDIR) == 0:
        stIn = stIn.replace(PERSONDIR, '')
    if stIn.endswith('.ft3'):
        stIn = stIn.replace('.ft3', '')
    return(stIn)

def process_form(diForm):
    dChanges = {}
    stIn = add_prefix_and_suffix(diForm['infile'])
    # Populate Fronimo object from input file
    fron = Fronimo(stIn)
    if fron.start_offset < 0:
        Fronimo.print_error(stIn,"Cannot populate Fronimo.")
        print('Cannot populate class Fronimo from file "', stIn, '.')
        return False
    # load items from form into Fronimo object
    fron.load_form(diForm)
    # create fronimo file string from fronimo object
    if not fron.depopulate():
        Fronimo.print_error(stIn,"Cannot update fronimo file string.")
        return False
    else:
        if fron.write_file():
            return True
        else:
            Fronimo.print_error(stIn,"Cannot write out fronimo file.")
            return False
# end process_form