import os import json import difflib from wtforms.validators import InputRequired, Length, NumberRange, Optional, Regexp, ValidationError, InputRequired, DataRequired import platform, re try: import readline except ImportError: readline = None from proc import add_prefix_and_suffix, del_prefix_and_suffix currOS = platform.platform() if currOS.find('Windows') == 0: LOCALBASE = 'D:/website/' else: LOCALBASE = '/mnt/d/website/' #LOCALBASE would normally be '' (a contributors directory) when this is #in production mode. #This would be a separate app from the main website, launched in a #separate directory, with perhaps a few links to certain files in #the main website base directory. The user would have read access #to template.ft3 but rw access only to files in their own subdirectory. # GLOBALS NAMESFILE = LOCALBASE + "names.tsv" INSTSFILE = LOCALBASE + "insts.tsv" TYPESFILE = LOCALBASE + "types.tsv" FACHEDIR = LOCALBASE + "facsimiles" SOURCEDIR = LOCALBASE + "sources" DFTJSON = LOCALBASE + "dft.json" SETTINGSJSON = LOCALBASE + "settings.json" CONTRIBDIR = LOCALBASE + 'contributors/' #This will change when everything is debugged #PERSONALDIR = CONTRIBDIR + lastName + firstName[0] PERSONDIR = CONTRIBDIR TEMPLATE = PERSONDIR +'template.ft3' def make_list(flList): liOut = [] stList = flList.read() lsIn = stList.split('\n') for line in lsIn: stOut = '' line = line.strip() lsRec = line.split('\t') if flList == flNames: if len(lsRec) < 2: stOut = lsRec[0] else: stOut = lsRec[1] + ' ' + lsRec[0] liOut.append(stOut) elif flList == flInsts: # it's an instrument list if len(lsRec) >= 2: liOut.append(lsRec[1]) else: #it's a music type list liOut.append(lsRec[0]) return liOut def make_instrument_list(flList): liOut = [] stList = flList.read() lsIn = stList.split('\n') for line in lsIn: line = line.strip() if line == '': continue lsRec = line.split('\t') if len(lsRec) < 3: continue if lsRec[2].strip().lower() == 'inst': continue liOut.append(lsRec[2].strip()) return liOut def make_type_list(flList): liOut = [] stList = flList.read() lsIn = stList.split('\n') for line in lsIn: line = line.strip() if line == '': continue lsRec = line.split('\t') if lsRec[0].strip() != '': liOut.append(lsRec[0].strip()) return liOut def make_library_list(dirPath): liOut = [] if not os.path.isdir(dirPath): return liOut for entry in os.listdir(dirPath): full_path = os.path.join(dirPath, entry) if not os.path.isdir(full_path) or '_' not in entry: continue siglum, library_name = entry.split('_', 1) if re.fullmatch(r'[A-Z]{1,3}-[A-Z]{1,3}[A-Za-z]*', siglum): liOut.append(f"{siglum}:{library_name.replace('_', ' ')}") return liOut def make_name_facsimile_key_lookup(stNameFile): diOut = {} try: flNameFile = open(stNameFile, 'r', encoding='latin1') except OSError: return diOut with flNameFile: for line in flNameFile: line = line.strip() if line == '': continue lsRec = line.split('\t') stLast = lsRec[0].strip() if len(lsRec) > 0 else '' stFirst = lsRec[1].strip() if len(lsRec) > 1 else '' stDisplay = (stFirst + ' ' + stLast).strip() if stDisplay == '' or stFirst == '': continue stLastKey = re.sub(r'[^A-Za-z0-9]+', '', stLast) stFirstKey = re.sub(r'[^A-Za-z0-9]+', '', stFirst) if stLastKey == '' or stFirstKey == '': continue diOut[normalize_name_value(stDisplay)] = stLastKey + stFirstKey[0] return diOut def get_documents_for_source(source): stSource = source.strip() if stSource == '': return [] liDocuments = diSourceDocuments.get(normalize_source_value(stSource), []) if len(liDocuments) > 0: return liDocuments liSourceDirs = [] siglum_match = re.match(r'^([A-Z]{1,3}-[A-Z]{1,3}[A-Za-z]*)(?::\s*(.*))?$', stSource) if siglum_match: stSiglum = siglum_match.group(1) for entry in os.listdir(SOURCEDIR): full_path = os.path.join(FACHEDIR, entry) source_path = os.path.join(SOURCEDIR, entry) if not os.path.isdir(source_path): continue if entry.startswith(stSiglum + '_'): liSourceDirs.append(source_path) return get_document_names(liSourceDirs) stKey = diNameFacsimileKeys.get(normalize_name_value(stSource), '') if stKey == '': return [] for entry in os.listdir(SOURCEDIR): source_path = os.path.join(SOURCEDIR, entry) if not os.path.isdir(source_path): continue if '_' in entry and re.fullmatch(r'[A-Z]{1,3}-[A-Z]{1,3}[A-Za-z]*', entry.split('_', 1)[0]): continue liTokens = entry.split('+') if stKey in liTokens: liSourceDirs.append(source_path) return get_document_names(liSourceDirs) def build_source_document_index(): diIndex = load_source_document_index_from_json(DFTJSON) if len(diIndex) > 0: return diIndex return load_source_document_index_from_json(SETTINGSJSON) def build_source_list(): liOut = load_source_list_from_json(DFTJSON) if len(liOut) > 0: return liOut return load_source_list_from_json(SETTINGSJSON) def load_source_list_from_json(stPath): if not os.path.exists(stPath): return [] data = load_json_file(stPath) if not isinstance(data, list): return [] liOut = [] seSeen = set() for item in data: if not isinstance(item, dict): continue stSource = str(item.get('source', '')).strip() stNormalized = normalize_source_value(stSource) if stNormalized == '' or stNormalized in seSeen: continue seSeen.add(stNormalized) liOut.append(stSource) return liOut def load_source_document_index_from_json(stPath): if not os.path.exists(stPath): return {} data = load_json_file(stPath) if not isinstance(data, list): return {} diIndex = {} for item in data: if not isinstance(item, dict): continue stSource = str(item.get('source', '')).strip() stDocument = str(item.get('document', '')).strip() if stSource == '' or stDocument == '': continue if not is_usable_document_value(stDocument): continue stNormalizedSource = normalize_source_value(stSource) stNormalizedDocument = normalize_document_value(stDocument) if stNormalizedSource == '' or stNormalizedDocument == '': continue if stNormalizedSource not in diIndex: diIndex[stNormalizedSource] = [] if stNormalizedDocument not in {normalize_document_value(doc) for doc in diIndex[stNormalizedSource]}: diIndex[stNormalizedSource].append(stDocument) return diIndex def is_usable_document_value(stDocument): stNormalized = normalize_document_value(stDocument) if stNormalized == '': return False if re.fullmatch(r'(?:[a-z]+\.)+[a-z.]*', stNormalized) and not re.search(r'\d', stNormalized): return False return True def load_json_file(stPath): for encoding in ('utf-8', 'latin1'): try: with open(stPath, 'r', encoding=encoding) as flJson: return json.load(flJson) except (OSError, ValueError, UnicodeDecodeError): continue return None def get_document_names(liSourceDirs): seDocuments = set() for stSourceDir in liSourceDirs: try: liEntries = os.listdir(stSourceDir) except OSError: continue for entry in liEntries: full_path = os.path.join(stSourceDir, entry) if os.path.isdir(full_path): seDocuments.add(get_document_display_name(full_path, entry)) return sorted(seDocuments, key=str.lower) def get_document_display_name(stDocumentDir, stFallback): stDocument = get_document_from_dft_json(stDocumentDir) if stDocument != '': return stDocument stDocument = get_document_from_dft_tsv(stDocumentDir) if stDocument != '': return stDocument return format_document_slug(stFallback) def format_document_slug(stSlug): liTokens = [item for item in stSlug.split('_') if item != ''] if len(liTokens) == 0: return stSlug if re.fullmatch(r'(?:c\.?)?\d{3,4}', liTokens[-1], re.IGNORECASE): liTokens = liTokens[:-1] if len(liTokens) == 0: return stSlug left_tokens = [] right_tokens = [] if len(liTokens) >= 2 and re.fullmatch(r'[A-Za-z.]+', liTokens[0]) and re.search(r'\d', liTokens[1]): left_tokens = liTokens[:2] right_tokens = liTokens[2:] elif re.search(r'\d', liTokens[0]): left_tokens = liTokens[:1] right_tokens = liTokens[1:] else: right_tokens = liTokens stLeft = ' '.join(left_tokens).strip() stRight = ' '.join(right_tokens).strip() if stLeft != '' and stRight != '': return stLeft + ':' + stRight if stLeft != '': return stLeft if stRight != '': return stRight return stSlug def get_document_from_dft_json(stDocumentDir): stJson = os.path.join(stDocumentDir, 'dft.json') if not os.path.exists(stJson): return '' try: with open(stJson, 'r', encoding='utf-8') as flJson: data = json.load(flJson) except (OSError, ValueError, UnicodeDecodeError): return '' if isinstance(data, list): for item in data: if isinstance(item, dict): stDocument = str(item.get('document', '')).strip() if stDocument != '': return stDocument elif isinstance(data, dict): stDocument = str(data.get('document', '')).strip() if stDocument != '': return stDocument return '' def get_document_from_dft_tsv(stDocumentDir): stTsv = os.path.join(stDocumentDir, 'dft.tsv') if not os.path.exists(stTsv): return '' try: with open(stTsv, 'r', encoding='utf-8') as flTsv: stHeader = flTsv.readline() if stHeader == '': return '' liHeader = [item.strip() for item in stHeader.split('\t')] try: nDocument = liHeader.index('Document') except ValueError: return '' for line in flTsv: line = line.rstrip('\n\r') if line == '': continue liFields = line.split('\t') if nDocument < len(liFields): stDocument = liFields[nDocument].strip() if stDocument != '': return stDocument except (OSError, UnicodeDecodeError): return '' return '' def normalize_instrument_name(value): if value is None: return '' value = value.strip().lower() value = re.sub(r'\s+', ' ', value) return value def normalize_type_name(value): if value is None: return '' value = value.strip().lower() value = re.sub(r'\s+', ' ', value) return value def normalize_name_value(value): if value is None: return '' value = value.strip().lower() value = re.sub(r'\s+', ' ', value) return value def normalize_source_value(value): if value is None: return '' value = value.strip() value = re.sub(r'\s+', ' ', value) if ':' in value: left, right = value.split(':', 1) value = left.strip() + ':' + right.strip() return value.lower() def normalize_document_value(value): if value is None: return '' value = value.strip().lower() value = re.sub(r'\s+', ' ', value) return value def build_instrument_lookup(liInsts): diLookup = {} for inst in liInsts: normalized = normalize_instrument_name(inst) if normalized and normalized not in diLookup: diLookup[normalized] = re.sub(r'\s+', ' ', inst.strip()) return diLookup def build_type_lookup(liTypes): diLookup = {} for music_type in liTypes: normalized = normalize_type_name(music_type) if normalized and normalized not in diLookup: diLookup[normalized] = re.sub(r'\s+', ' ', music_type.strip()) return diLookup def build_name_lookup(liNames): diLookup = {} for name in liNames: normalized = normalize_name_value(name) if normalized and normalized not in diLookup: diLookup[normalized] = re.sub(r'\s+', ' ', name.strip()) return diLookup def build_source_lookup(liSources): diLookup = {} for source in liSources: normalized = normalize_source_value(source) if normalized and normalized not in diLookup: diLookup[normalized] = re.sub(r'\s+', ' ', source.strip()) return diLookup def parse_csv_field(value): if not value: return [] return strip_list(value.split(',')) def validate_instrument_name(inst): normalized = normalize_instrument_name(inst) valid_names = sorted(diInsts.keys()) if normalized != '' and normalized in diInsts: return None suggestions = difflib.get_close_matches(normalized, valid_names, n=5, cutoff=0.7) suggestion_text = ', '.join(diInsts[item] for item in suggestions) label = inst if inst and inst.strip() else '(blank entry)' if suggestion_text: return f'"{label}" is not a valid instrument. Closest matches: {suggestion_text}' return f'"{label}" is not a valid instrument.' def validate_person_name(name): normalized = normalize_name_value(name) valid_names = sorted(diNames.keys()) if normalized != '' and normalized in diNames: return None suggestions = difflib.get_close_matches(normalized, valid_names, n=5, cutoff=0.7) suggestion_text = ', '.join(diNames[item] for item in suggestions) label = name if name and name.strip() else '(blank entry)' if suggestion_text: return f'"{label}" is not a recognized name. Closest matches: {suggestion_text}' return f'"{label}" is not a recognized name.' def validate_source_name(source): stSource = source.strip() if stSource == '': return '"(blank entry)" is not a valid source.' stNormalized = normalize_source_value(stSource) valid_sources = sorted(diSources.keys()) if stNormalized != '' and stNormalized in diSources: return None suggestions = difflib.get_close_matches(stNormalized, valid_sources, n=5, cutoff=0.7) suggestion_text = ', '.join(diSources[item] for item in suggestions) if suggestion_text: return f'"{source}" is not a recognized source. Closest matches: {suggestion_text}' return f'"{source}" is not a recognized source.' def parse_ensemble_members(value): members = [] for item in parse_csv_field(value): if ':' in item: designation, instrument = item.split(':', 1) members.append((designation.strip(), instrument.strip(), item)) else: members.append((None, item.strip(), item)) return members flNames = open(NAMESFILE, 'r', encoding='latin1') if not flNames: print("Cannot open ", NAMESFILE) else: liNames = make_list(flNames) diNames = build_name_lookup(liNames) liLibraries = make_library_list(FACHEDIR) diLibraries = build_source_lookup(liLibraries) diNameFacsimileKeys = make_name_facsimile_key_lookup(NAMESFILE) liSources = build_source_list() diSources = build_source_lookup(liSources) diSourceDocuments = build_source_document_index() flInsts = open(INSTSFILE, 'r', encoding='latin1') if not flInsts: print("Cannot open ", INSTSFILE) else: liInsts = make_instrument_list(flInsts) diInsts = build_instrument_lookup(liInsts) flTypes = open(TYPESFILE, 'r', encoding='latin1') if not flTypes: print("Cannot open ", TYPESFILE) else: liTypes = make_type_list(flTypes) diTypes = build_type_lookup(liTypes) def strip_list(liIn): n = 0 for item in liIn: item = item.strip() liIn[n] = item n += 1 return liIn def key_val(form, field): stIn = field.data stIn = stIn.strip() liKeyList = strip_list(stIn.split(',')) badKeys = [] goodKeys = [] for mKey in liKeyList: if not re.fullmatch('[A-G][#b]?[Mm]', mKey): badKeys.append(mKey) else: goodKeys.append(mKey) if len(badKeys) > 0: stErr = ','.join(badKeys) stErr = 'Bad Keys: ' + stErr raise ValidationError(stErr) def type_val(form, field): invalid_details = [] valid_names = sorted(diTypes.keys()) for music_type in parse_csv_field(field.data.strip()): normalized = normalize_type_name(music_type) if normalized != '' and normalized in diTypes: continue suggestions = difflib.get_close_matches(normalized, valid_names, n=5, cutoff=0.7) suggestion_text = ', '.join(diTypes[item] for item in suggestions) label = music_type if music_type and music_type.strip() else '(blank entry)' if suggestion_text: invalid_details.append(f'"{label}" is not a valid music type. Closest matches: {suggestion_text}') else: invalid_details.append(f'"{label}" is not a valid music type.') if len(invalid_details) > 0: stErr = 'Invalid music type[s]: ' + ' '.join(invalid_details) raise ValidationError(stErr) def ensemble_val(form, field): invalid_details = [] for designation, instrument, original in parse_ensemble_members(field.data.strip()): if designation is not None: if designation == '': invalid_details.append(f'"{original}" must have a designation before the colon.') continue if instrument == '': invalid_details.append(f'"{original}" must have an instrument name after the colon.') continue error = validate_instrument_name(instrument) if error: invalid_details.append(error) if len(invalid_details) > 0: stErr = 'Invalid instrument list: ' + ' '.join(invalid_details) raise ValidationError(stErr) def part_val(form, field): stPart = field.data.strip() if stPart == '': return if form.ensemble.data.strip() == '': raise ValidationError('Enter ensemble first.') allowed_parts = set() for designation, instrument, original in parse_ensemble_members(form.ensemble.data.strip()): normalized_original = normalize_instrument_name(original) if normalized_original: allowed_parts.add(normalized_original) if designation is not None: normalized_designation = normalize_instrument_name(designation) if normalized_designation: allowed_parts.add(normalized_designation) bad_parts = [] for item in parse_csv_field(stPart): normalized = normalize_instrument_name(item) if normalized == 'score': continue if normalized == '' or normalized not in allowed_parts: bad_parts.append(item if item != '' else '(blank entry)') if bad_parts: raise ValidationError('Part not in ensemble: ' + ', '.join(bad_parts)) def source_val(form, field): stIn = field.data stIn = stIn.strip() if getattr(form, 'allow_new_source', None) and form.allow_new_source.data == 'yes': return error = validate_source_name(stIn) if error: raise ValidationError(error) def document_val(form, field): stDocument = field.data.strip() if stDocument == '': return if getattr(form, 'allow_new_document', None) and form.allow_new_document.data == 'yes': return if getattr(form, 'allow_new_source', None) and form.allow_new_source.data == 'yes': return liDocuments = get_documents_for_source(form.source.data.strip()) if len(liDocuments) == 0: return stNormalized = normalize_document_value(stDocument) seDocuments = {normalize_document_value(item) for item in liDocuments} if stNormalized in seDocuments: return raise ValidationError('Document not found for this source.') def volume_val(form, field): stVol = field.data if stVol.isdigit(): vol = int(stVol) if not vol in range(1,99): raise ValidationError('Invalid volume number. Must be 1-99.') else: return True elif stVol != '': raise ValidationError('Must be blank, or a number 1-99') else: return True def page_val(form, field): def valid_page_designation(stPage): if re.fullmatch('[0-9]+[rv]?[a-m]?', stPage): return True val = re.fullmatch('([a-zA-Z])([a-zA-Z]?)[1-8]v?[a-m]?', stPage) if not val: return False return not (val.group(2) and (val.group(1) != val.group(2))) stIn = field.data stIn = stIn.strip() if re.match(r'^(pp|ff)\.\s*', stIn, re.IGNORECASE): stPages = re.sub(r'^(pp|ff)\.\s*', '', stIn, count=1, flags=re.IGNORECASE) liPageList = strip_list(stPages.split(',')) if len(liPageList) < 2 or '' in liPageList: raise ValidationError('Bad pages: use pp. or ff. only for a comma-separated list like ff.135,285b.') badPages = [stPage for stPage in liPageList if not valid_page_designation(stPage)] if len(badPages) > 0: stErr = ','.join(badPages) stErr = 'Bad pages: ' + stErr raise ValidationError(stErr) return if re.match(r'^[pf]\.\s*', stIn, re.IGNORECASE): stPage = re.sub(r'^[pf]\.\s*', '', stIn, count=1, flags=re.IGNORECASE) if stPage.find(',') != -1 or not valid_page_designation(stPage): raise ValidationError('Bad pages: use p. or f. for a single page, or pp./ff. for a list.') return raise ValidationError('Bad pages: must begin with p., f., pp., or ff.') def section_val(form, field): stIn = field.data stIn = stIn.strip() if not re.fullmatch('[1-9][0-9]*\. [A-Z].*', stIn): raise ValidationError("Section name must start with a number, followed by a . and a space") def name_val(form, field): return True stInput = latin1_to_ascii(field.data) val = re.search('([^ ][^ ]*) *([A-Za-z])..*', stInput) if not val: stErr = 'Cannot parse name: "' + stInput + '". Need + space + .' raise ValidationError(stErr) stInput =val.group(2) + val.group(1).upper()[0] if os.path_writeable(stInput): return True else: print('No home directory for ', stInput) return False def composer_val(form, field): error = validate_person_name(field.data) if error: raise ValidationError(error) def output_val(form, field): stOutput = field.data if stOutput.find('/') != -1 or stOutput.find('\\') != -1: stErr = 'Cannot have a slash in the file name.' raise ValidationError(stErr) if not os.access(PERSONDIR, os.F_OK) and os.access(PERSONDIR, os.W_OK): stErr = 'Directory ' + PERSONDIR + 'not writable or executable.\ \n Therefore cannnot write to ' + field.data + '.' raise ValidationError(stErr) def input_val(form, field): stInput = field.data if stInput == '': stInput = TEMPLATE stInput = add_prefix_and_suffix(stInput) if not os.path.exists(stInput): stErr = 'Input file: ' + field.data + ' not found.' raise ValidationError(stErr)