import os
import json
import difflib
from wtforms.validators import InputRequired, Length, NumberRange, Optional, Regexp, ValidationError, InputRequired, DataRequired
import platform, re
try:
    import readline
except ImportError:
    readline = None
from proc import add_prefix_and_suffix, del_prefix_and_suffix
currOS = platform.platform()
if currOS.find('Windows') == 0:
    LOCALBASE = 'D:/website/'
else:
    LOCALBASE = '/mnt/d/website/'

#LOCALBASE would normally be '' (a contributors directory) when this is
#in production mode.
#This would be a separate app from the main website, launched in a
#separate directory, with perhaps a few links to certain files in 
#the main website base directory. The user would have read access
#to template.ft3 but rw access only to files in their own subdirectory.

# GLOBALS
NAMESFILE = LOCALBASE + "names.tsv"
INSTSFILE = LOCALBASE + "insts.tsv"
TYPESFILE = LOCALBASE + "types.tsv"
FACHEDIR = LOCALBASE + "facsimiles"
SOURCEDIR = LOCALBASE + "sources"
DFTJSON = LOCALBASE + "dft.json"
SETTINGSJSON = LOCALBASE + "settings.json"
CONTRIBDIR = LOCALBASE + 'contributors/'
#This will change when everything is debugged
#PERSONALDIR = CONTRIBDIR + lastName + firstName[0]
PERSONDIR = CONTRIBDIR
TEMPLATE = PERSONDIR +'template.ft3'

def make_list(flList):
    liOut = []
    stList = flList.read()
    lsIn = stList.split('\n')
    for line in lsIn:
        stOut = ''
        line = line.strip()
        lsRec = line.split('\t')
        if flList == flNames:
            if len(lsRec) < 2:
                stOut = lsRec[0]
            else:
                stOut = lsRec[1] + ' ' + lsRec[0]
            liOut.append(stOut)
        elif flList == flInsts: # it's an instrument list
            if len(lsRec) >= 2:
                liOut.append(lsRec[1])
        else: #it's a music type list
            liOut.append(lsRec[0])
    return liOut

def make_instrument_list(flList):
    liOut = []
    stList = flList.read()
    lsIn = stList.split('\n')
    for line in lsIn:
        line = line.strip()
        if line == '':
            continue
        lsRec = line.split('\t')
        if len(lsRec) < 3:
            continue
        if lsRec[2].strip().lower() == 'inst':
            continue
        liOut.append(lsRec[2].strip())
    return liOut

def make_type_list(flList):
    liOut = []
    stList = flList.read()
    lsIn = stList.split('\n')
    for line in lsIn:
        line = line.strip()
        if line == '':
            continue
        lsRec = line.split('\t')
        if lsRec[0].strip() != '':
            liOut.append(lsRec[0].strip())
    return liOut

def make_library_list(dirPath):
    liOut = []
    if not os.path.isdir(dirPath):
        return liOut
    for entry in os.listdir(dirPath):
        full_path = os.path.join(dirPath, entry)
        if not os.path.isdir(full_path) or '_' not in entry:
            continue
        siglum, library_name = entry.split('_', 1)
        if re.fullmatch(r'[A-Z]{1,3}-[A-Z]{1,3}[A-Za-z]*', siglum):
            liOut.append(f"{siglum}:{library_name.replace('_', ' ')}")
    return liOut

def make_name_facsimile_key_lookup(stNameFile):
    diOut = {}
    try:
        flNameFile = open(stNameFile, 'r', encoding='latin1')
    except OSError:
        return diOut

    with flNameFile:
        for line in flNameFile:
            line = line.strip()
            if line == '':
                continue
            lsRec = line.split('\t')
            stLast = lsRec[0].strip() if len(lsRec) > 0 else ''
            stFirst = lsRec[1].strip() if len(lsRec) > 1 else ''
            stDisplay = (stFirst + ' ' + stLast).strip()
            if stDisplay == '' or stFirst == '':
                continue
            stLastKey = re.sub(r'[^A-Za-z0-9]+', '', stLast)
            stFirstKey = re.sub(r'[^A-Za-z0-9]+', '', stFirst)
            if stLastKey == '' or stFirstKey == '':
                continue
            diOut[normalize_name_value(stDisplay)] = stLastKey + stFirstKey[0]
    return diOut

def get_documents_for_source(source):
    stSource = source.strip()
    if stSource == '':
        return []

    liDocuments = diSourceDocuments.get(normalize_source_value(stSource), [])
    if len(liDocuments) > 0:
        return liDocuments

    liSourceDirs = []
    siglum_match = re.match(r'^([A-Z]{1,3}-[A-Z]{1,3}[A-Za-z]*)(?::\s*(.*))?$', stSource)
    if siglum_match:
        stSiglum = siglum_match.group(1)
        for entry in os.listdir(SOURCEDIR):
            full_path = os.path.join(FACHEDIR, entry)
            source_path = os.path.join(SOURCEDIR, entry)
            if not os.path.isdir(source_path):
                continue
            if entry.startswith(stSiglum + '_'):
                liSourceDirs.append(source_path)
        return get_document_names(liSourceDirs)

    stKey = diNameFacsimileKeys.get(normalize_name_value(stSource), '')
    if stKey == '':
        return []

    for entry in os.listdir(SOURCEDIR):
        source_path = os.path.join(SOURCEDIR, entry)
        if not os.path.isdir(source_path):
            continue
        if '_' in entry and re.fullmatch(r'[A-Z]{1,3}-[A-Z]{1,3}[A-Za-z]*', entry.split('_', 1)[0]):
            continue
        liTokens = entry.split('+')
        if stKey in liTokens:
            liSourceDirs.append(source_path)
    return get_document_names(liSourceDirs)

def build_source_document_index():
    diIndex = load_source_document_index_from_json(DFTJSON)
    if len(diIndex) > 0:
        return diIndex
    return load_source_document_index_from_json(SETTINGSJSON)

def build_source_list():
    liOut = load_source_list_from_json(DFTJSON)
    if len(liOut) > 0:
        return liOut
    return load_source_list_from_json(SETTINGSJSON)

def load_source_list_from_json(stPath):
    if not os.path.exists(stPath):
        return []

    data = load_json_file(stPath)
    if not isinstance(data, list):
        return []

    liOut = []
    seSeen = set()
    for item in data:
        if not isinstance(item, dict):
            continue
        stSource = str(item.get('source', '')).strip()
        stNormalized = normalize_source_value(stSource)
        if stNormalized == '' or stNormalized in seSeen:
            continue
        seSeen.add(stNormalized)
        liOut.append(stSource)
    return liOut

def load_source_document_index_from_json(stPath):
    if not os.path.exists(stPath):
        return {}

    data = load_json_file(stPath)
    if not isinstance(data, list):
        return {}

    diIndex = {}
    for item in data:
        if not isinstance(item, dict):
            continue
        stSource = str(item.get('source', '')).strip()
        stDocument = str(item.get('document', '')).strip()
        if stSource == '' or stDocument == '':
            continue
        if not is_usable_document_value(stDocument):
            continue
        stNormalizedSource = normalize_source_value(stSource)
        stNormalizedDocument = normalize_document_value(stDocument)
        if stNormalizedSource == '' or stNormalizedDocument == '':
            continue
        if stNormalizedSource not in diIndex:
            diIndex[stNormalizedSource] = []
        if stNormalizedDocument not in {normalize_document_value(doc) for doc in diIndex[stNormalizedSource]}:
            diIndex[stNormalizedSource].append(stDocument)
    return diIndex

def is_usable_document_value(stDocument):
    stNormalized = normalize_document_value(stDocument)
    if stNormalized == '':
        return False
    if re.fullmatch(r'(?:[a-z]+\.)+[a-z.]*', stNormalized) and not re.search(r'\d', stNormalized):
        return False
    return True

def load_json_file(stPath):
    for encoding in ('utf-8', 'latin1'):
        try:
            with open(stPath, 'r', encoding=encoding) as flJson:
                return json.load(flJson)
        except (OSError, ValueError, UnicodeDecodeError):
            continue
    return None

def get_document_names(liSourceDirs):
    seDocuments = set()
    for stSourceDir in liSourceDirs:
        try:
            liEntries = os.listdir(stSourceDir)
        except OSError:
            continue
        for entry in liEntries:
            full_path = os.path.join(stSourceDir, entry)
            if os.path.isdir(full_path):
                seDocuments.add(get_document_display_name(full_path, entry))
    return sorted(seDocuments, key=str.lower)

def get_document_display_name(stDocumentDir, stFallback):
    stDocument = get_document_from_dft_json(stDocumentDir)
    if stDocument != '':
        return stDocument
    stDocument = get_document_from_dft_tsv(stDocumentDir)
    if stDocument != '':
        return stDocument
    return format_document_slug(stFallback)

def format_document_slug(stSlug):
    liTokens = [item for item in stSlug.split('_') if item != '']
    if len(liTokens) == 0:
        return stSlug

    if re.fullmatch(r'(?:c\.?)?\d{3,4}', liTokens[-1], re.IGNORECASE):
        liTokens = liTokens[:-1]
    if len(liTokens) == 0:
        return stSlug

    left_tokens = []
    right_tokens = []

    if len(liTokens) >= 2 and re.fullmatch(r'[A-Za-z.]+', liTokens[0]) and re.search(r'\d', liTokens[1]):
        left_tokens = liTokens[:2]
        right_tokens = liTokens[2:]
    elif re.search(r'\d', liTokens[0]):
        left_tokens = liTokens[:1]
        right_tokens = liTokens[1:]
    else:
        right_tokens = liTokens

    stLeft = ' '.join(left_tokens).strip()
    stRight = ' '.join(right_tokens).strip()
    if stLeft != '' and stRight != '':
        return stLeft + ':' + stRight
    if stLeft != '':
        return stLeft
    if stRight != '':
        return stRight
    return stSlug

def get_document_from_dft_json(stDocumentDir):
    stJson = os.path.join(stDocumentDir, 'dft.json')
    if not os.path.exists(stJson):
        return ''
    try:
        with open(stJson, 'r', encoding='utf-8') as flJson:
            data = json.load(flJson)
    except (OSError, ValueError, UnicodeDecodeError):
        return ''

    if isinstance(data, list):
        for item in data:
            if isinstance(item, dict):
                stDocument = str(item.get('document', '')).strip()
                if stDocument != '':
                    return stDocument
    elif isinstance(data, dict):
        stDocument = str(data.get('document', '')).strip()
        if stDocument != '':
            return stDocument
    return ''

def get_document_from_dft_tsv(stDocumentDir):
    stTsv = os.path.join(stDocumentDir, 'dft.tsv')
    if not os.path.exists(stTsv):
        return ''
    try:
        with open(stTsv, 'r', encoding='utf-8') as flTsv:
            stHeader = flTsv.readline()
            if stHeader == '':
                return ''
            liHeader = [item.strip() for item in stHeader.split('\t')]
            try:
                nDocument = liHeader.index('Document')
            except ValueError:
                return ''
            for line in flTsv:
                line = line.rstrip('\n\r')
                if line == '':
                    continue
                liFields = line.split('\t')
                if nDocument < len(liFields):
                    stDocument = liFields[nDocument].strip()
                    if stDocument != '':
                        return stDocument
    except (OSError, UnicodeDecodeError):
        return ''
    return ''

def normalize_instrument_name(value):
    if value is None:
        return ''
    value = value.strip().lower()
    value = re.sub(r'\s+', ' ', value)
    return value

def normalize_type_name(value):
    if value is None:
        return ''
    value = value.strip().lower()
    value = re.sub(r'\s+', ' ', value)
    return value

def normalize_name_value(value):
    if value is None:
        return ''
    value = value.strip().lower()
    value = re.sub(r'\s+', ' ', value)
    return value

def normalize_source_value(value):
    if value is None:
        return ''
    value = value.strip()
    value = re.sub(r'\s+', ' ', value)
    if ':' in value:
        left, right = value.split(':', 1)
        value = left.strip() + ':' + right.strip()
    return value.lower()

def normalize_document_value(value):
    if value is None:
        return ''
    value = value.strip().lower()
    value = re.sub(r'\s+', ' ', value)
    return value

def build_instrument_lookup(liInsts):
    diLookup = {}
    for inst in liInsts:
        normalized = normalize_instrument_name(inst)
        if normalized and normalized not in diLookup:
            diLookup[normalized] = re.sub(r'\s+', ' ', inst.strip())
    return diLookup

def build_type_lookup(liTypes):
    diLookup = {}
    for music_type in liTypes:
        normalized = normalize_type_name(music_type)
        if normalized and normalized not in diLookup:
            diLookup[normalized] = re.sub(r'\s+', ' ', music_type.strip())
    return diLookup

def build_name_lookup(liNames):
    diLookup = {}
    for name in liNames:
        normalized = normalize_name_value(name)
        if normalized and normalized not in diLookup:
            diLookup[normalized] = re.sub(r'\s+', ' ', name.strip())
    return diLookup

def build_source_lookup(liSources):
    diLookup = {}
    for source in liSources:
        normalized = normalize_source_value(source)
        if normalized and normalized not in diLookup:
            diLookup[normalized] = re.sub(r'\s+', ' ', source.strip())
    return diLookup

def parse_csv_field(value):
    if not value:
        return []
    return strip_list(value.split(','))

def validate_instrument_name(inst):
    normalized = normalize_instrument_name(inst)
    valid_names = sorted(diInsts.keys())
    if normalized != '' and normalized in diInsts:
        return None
    suggestions = difflib.get_close_matches(normalized, valid_names, n=5, cutoff=0.7)
    suggestion_text = ', '.join(diInsts[item] for item in suggestions)
    label = inst if inst and inst.strip() else '(blank entry)'
    if suggestion_text:
        return f'"{label}" is not a valid instrument. Closest matches: {suggestion_text}'
    return f'"{label}" is not a valid instrument.'

def validate_person_name(name):
    normalized = normalize_name_value(name)
    valid_names = sorted(diNames.keys())
    if normalized != '' and normalized in diNames:
        return None
    suggestions = difflib.get_close_matches(normalized, valid_names, n=5, cutoff=0.7)
    suggestion_text = ', '.join(diNames[item] for item in suggestions)
    label = name if name and name.strip() else '(blank entry)'
    if suggestion_text:
        return f'"{label}" is not a recognized name. Closest matches: {suggestion_text}'
    return f'"{label}" is not a recognized name.'

def validate_source_name(source):
    stSource = source.strip()
    if stSource == '':
        return '"(blank entry)" is not a valid source.'
    stNormalized = normalize_source_value(stSource)
    valid_sources = sorted(diSources.keys())
    if stNormalized != '' and stNormalized in diSources:
        return None
    suggestions = difflib.get_close_matches(stNormalized, valid_sources, n=5, cutoff=0.7)
    suggestion_text = ', '.join(diSources[item] for item in suggestions)
    if suggestion_text:
        return f'"{source}" is not a recognized source. Closest matches: {suggestion_text}'
    return f'"{source}" is not a recognized source.'

def parse_ensemble_members(value):
    members = []
    for item in parse_csv_field(value):
        if ':' in item:
            designation, instrument = item.split(':', 1)
            members.append((designation.strip(), instrument.strip(), item))
        else:
            members.append((None, item.strip(), item))
    return members

flNames = open(NAMESFILE, 'r', encoding='latin1')
if not flNames:
    print("Cannot open ", NAMESFILE)
else:
    liNames = make_list(flNames)
    diNames = build_name_lookup(liNames)
liLibraries = make_library_list(FACHEDIR)
diLibraries = build_source_lookup(liLibraries)
diNameFacsimileKeys = make_name_facsimile_key_lookup(NAMESFILE)
liSources = build_source_list()
diSources = build_source_lookup(liSources)
diSourceDocuments = build_source_document_index()
flInsts = open(INSTSFILE, 'r', encoding='latin1')
if not flInsts:
    print("Cannot open ", INSTSFILE)
else:
    liInsts = make_instrument_list(flInsts)
    diInsts = build_instrument_lookup(liInsts)
flTypes = open(TYPESFILE, 'r', encoding='latin1')
if not flTypes:
    print("Cannot open ", TYPESFILE)
else:
    liTypes = make_type_list(flTypes)
    diTypes = build_type_lookup(liTypes)

def strip_list(liIn):
    n = 0
    for item in liIn:
        item = item.strip()
        liIn[n] = item
        n += 1
    return liIn

def key_val(form, field):
    stIn = field.data
    stIn = stIn.strip()
    liKeyList = strip_list(stIn.split(','))
    badKeys = []
    goodKeys = []
    for mKey in liKeyList:
        if not re.fullmatch('[A-G][#b]?[Mm]', mKey):
            badKeys.append(mKey)
        else:
            goodKeys.append(mKey)
    if len(badKeys) > 0:
        stErr = ','.join(badKeys)
        stErr = 'Bad Keys: ' + stErr
        raise ValidationError(stErr)

def type_val(form, field):
    invalid_details = []
    valid_names = sorted(diTypes.keys())
    for music_type in parse_csv_field(field.data.strip()):
        normalized = normalize_type_name(music_type)
        if normalized != '' and normalized in diTypes:
            continue
        suggestions = difflib.get_close_matches(normalized, valid_names, n=5, cutoff=0.7)
        suggestion_text = ', '.join(diTypes[item] for item in suggestions)
        label = music_type if music_type and music_type.strip() else '(blank entry)'
        if suggestion_text:
            invalid_details.append(f'"{label}" is not a valid music type. Closest matches: {suggestion_text}')
        else:
            invalid_details.append(f'"{label}" is not a valid music type.')
    if len(invalid_details) > 0:
        stErr = 'Invalid music type[s]: ' + ' '.join(invalid_details)
        raise ValidationError(stErr)

def ensemble_val(form, field):
    invalid_details = []
    for designation, instrument, original in parse_ensemble_members(field.data.strip()):
        if designation is not None:
            if designation == '':
                invalid_details.append(f'"{original}" must have a designation before the colon.')
                continue
            if instrument == '':
                invalid_details.append(f'"{original}" must have an instrument name after the colon.')
                continue
        error = validate_instrument_name(instrument)
        if error:
            invalid_details.append(error)
    if len(invalid_details) > 0:
        stErr = 'Invalid instrument list: ' + ' '.join(invalid_details)
        raise ValidationError(stErr)

def part_val(form, field):
    stPart = field.data.strip()
    if stPart == '':
        return
    if form.ensemble.data.strip() == '':
        raise ValidationError('Enter ensemble first.')

    allowed_parts = set()
    for designation, instrument, original in parse_ensemble_members(form.ensemble.data.strip()):
        normalized_original = normalize_instrument_name(original)
        if normalized_original:
            allowed_parts.add(normalized_original)
        if designation is not None:
            normalized_designation = normalize_instrument_name(designation)
            if normalized_designation:
                allowed_parts.add(normalized_designation)

    bad_parts = []
    for item in parse_csv_field(stPart):
        normalized = normalize_instrument_name(item)
        if normalized == 'score':
            continue
        if normalized == '' or normalized not in allowed_parts:
            bad_parts.append(item if item != '' else '(blank entry)')

    if bad_parts:
        raise ValidationError('Part not in ensemble: ' + ', '.join(bad_parts))


def source_val(form, field):
    stIn = field.data
    stIn = stIn.strip()
    if getattr(form, 'allow_new_source', None) and form.allow_new_source.data == 'yes':
        return
    error = validate_source_name(stIn)
    if error:
        raise ValidationError(error)

def document_val(form, field):
    stDocument = field.data.strip()
    if stDocument == '':
        return
    if getattr(form, 'allow_new_document', None) and form.allow_new_document.data == 'yes':
        return
    if getattr(form, 'allow_new_source', None) and form.allow_new_source.data == 'yes':
        return

    liDocuments = get_documents_for_source(form.source.data.strip())
    if len(liDocuments) == 0:
        return

    stNormalized = normalize_document_value(stDocument)
    seDocuments = {normalize_document_value(item) for item in liDocuments}
    if stNormalized in seDocuments:
        return
    raise ValidationError('Document not found for this source.')

def volume_val(form, field):
    stVol = field.data
    if stVol.isdigit():
        vol = int(stVol)
        if not vol in range(1,99):
            raise ValidationError('Invalid volume number. Must be 1-99.')
        else:
            return True
    elif stVol != '':
        raise ValidationError('Must be blank, or a number 1-99')
    else:
        return True

def page_val(form, field):
    def valid_page_designation(stPage):
        if re.fullmatch('[0-9]+[rv]?[a-m]?', stPage):
            return True
        val = re.fullmatch('([a-zA-Z])([a-zA-Z]?)[1-8]v?[a-m]?', stPage)
        if not val:
            return False
        return not (val.group(2) and (val.group(1) != val.group(2)))

    stIn = field.data
    stIn = stIn.strip()
    if re.match(r'^(pp|ff)\.\s*', stIn, re.IGNORECASE):
        stPages = re.sub(r'^(pp|ff)\.\s*', '', stIn, count=1, flags=re.IGNORECASE)
        liPageList = strip_list(stPages.split(','))
        if len(liPageList) < 2 or '' in liPageList:
            raise ValidationError('Bad pages: use pp. or ff. only for a comma-separated list like ff.135,285b.')
        badPages = [stPage for stPage in liPageList if not valid_page_designation(stPage)]
        if len(badPages) > 0:
            stErr = ','.join(badPages)
            stErr = 'Bad pages: ' + stErr
            raise ValidationError(stErr)
        return

    if re.match(r'^[pf]\.\s*', stIn, re.IGNORECASE):
        stPage = re.sub(r'^[pf]\.\s*', '', stIn, count=1, flags=re.IGNORECASE)
        if stPage.find(',') != -1 or not valid_page_designation(stPage):
            raise ValidationError('Bad pages: use p. or f. for a single page, or pp./ff. for a list.')
        return

    raise ValidationError('Bad pages: must begin with p., f., pp., or ff.')

def section_val(form, field):
    stIn = field.data
    stIn = stIn.strip()
    if not re.fullmatch('[1-9][0-9]*\. [A-Z].*', stIn):
        raise ValidationError("Section name must start with a number, followed by a . and a space")

def name_val(form, field):
    return True
    stInput = latin1_to_ascii(field.data)
    val = re.search('([^ ][^ ]*) *([A-Za-z])..*', stInput)
    if not val:
        stErr = 'Cannot parse name: "' + stInput + '". Need <first name> + space + <last name>.'
        raise ValidationError(stErr)
    stInput =val.group(2) + val.group(1).upper()[0]
    if os.path_writeable(stInput):
        return True
    else:
        print('No home directory for ', stInput)
    return False

def composer_val(form, field):
    error = validate_person_name(field.data)
    if error:
        raise ValidationError(error)

def output_val(form, field):
    stOutput = field.data
    if stOutput.find('/') != -1 or stOutput.find('\\') != -1:
        stErr = 'Cannot have a slash in the file name.'
        raise ValidationError(stErr)
    if not os.access(PERSONDIR, os.F_OK) and os.access(PERSONDIR, os.W_OK):
        stErr = 'Directory ' + PERSONDIR + 'not writable or executable.\
        \n Therefore cannnot write to ' + field.data + '.'
        raise ValidationError(stErr)

def input_val(form, field):
    stInput = field.data
    if stInput == '':
        stInput = TEMPLATE
    stInput = add_prefix_and_suffix(stInput)
    if not os.path.exists(stInput):
        stErr = 'Input file: ' + field.data + ' not found.'
        raise ValidationError(stErr)