sahana-eden/helpers/sahana-lang-compare.py

96 lines
3.6 KiB
Python

#!/usr/bin/env python3
import argparse
import csv
import io
import os
from ast import literal_eval
from pprint import pprint
# GIS CSV can have extra large fields
csv.field_size_limit(2147483647)
# Strings which don't exist verbatim in the code
extras = (
'Enter a number between %(min)g and %(max)g',
'Enter a number greater than or equal to %(min)g',
'Enter a number less than or equal to %(max)g',
'Enter an integer between %(min)g and %(max)g',
'Enter an integer greater than or equal to %(min)g',
'Enter an integer less than or equal to %(max)g',
)
def get_file_contents(filename):
# Return decoded file contents
with open(filename, 'rb') as f:
file_contents = f.read()
try:
return file_contents.decode('utf-8')
except UnicodeDecodeError:
return file_contents.decode('latin-1')
def get_csv_contents(filename):
# Return list of all fields from a CSV file
f = io.StringIO(get_file_contents(filename))
csv_contents = []
for row in csv.reader(f):
csv_contents.extend(row)
return csv_contents
def main(args):
basename = os.path.basename(args.langfile)
# Load existing translations from the current (old) Sahana Eden instance
with open(os.path.join(args.web2pydir, 'applications/eden/languages', basename)) as f:
old_translations = literal_eval(f.read())
# Load translations produced by sahana-lang-convert.py
with open(args.langfile) as f:
translations = literal_eval(f.read())
missing_translations = {key:value for key,value in old_translations.items() if key not in translations}
for root, dirs, files in os.walk(args.web2pydir):
# Iterate over all web2py subdirectories except "languages" which already contain translations
if 'languages' in dirs:
dirs.remove('languages')
for file in files:
extension = os.path.splitext(file)[1].lower()
filename = os.path.join(root, file)
if extension in ('.py', '.html', '.js'):
try:
file_contents = get_file_contents(filename)
except UnicodeDecodeError:
continue
for key,value in missing_translations.copy().items():
# Naively search for quoted strings in .py .html and .js files
if f"'{key}'" in file_contents or f'"{key}"' in file_contents:
translations[key] = value
del missing_translations[key]
elif extension == '.csv':
try:
csv_contents = get_csv_contents(filename)
except UnicodeDecodeError:
continue
for key,value in missing_translations.copy().items():
# Naively search for full strings in csv fields
if key in csv_contents:
translations[key] = value
del missing_translations[key]
for key in extras:
# Add the extra translations which are never matched verbatim
if key not in translations:
translations[key] = old_translations[key]
with open(basename, 'w') as langfile:
# Write the updated translation file
print('# -*- coding: utf-8 -*-', file=langfile)
pprint(translations, langfile, 0, 8192)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Spotter Cluster Sahana Eden translation comparator')
parser.add_argument('langfile', help='New translation file (with possibly missing strings).')
parser.add_argument('web2pydir', help='Path to Web2py root directory.')
main(parser.parse_args())