Commit a872a331 authored by Johannes Zeman's avatar Johannes Zeman

fixed bibfix non-ASCII character recognition

parent ed646c49
...@@ -29,7 +29,7 @@ verbose = args.verbose ...@@ -29,7 +29,7 @@ verbose = args.verbose
translate_to_latex = args.unicode_to_latex translate_to_latex = args.unicode_to_latex
print "Loading data from '%s'..." % (bib_file_name), print "Loading data from '%s'..." % (bib_file_name),
header, bib_data, bib_keys, bib_strings, bib_preamble = bibfixtools.load_bib_file(bib_file_name) raw_data, header, bib_data, bib_keys, bib_strings, bib_preamble = bibfixtools.load_bib_file(bib_file_name)
if header == None and bib_data == None and bib_keys == None: if header == None and bib_data == None and bib_keys == None:
print "failed." print "failed."
print "Fatal error occurred. Exiting..." print "Fatal error occurred. Exiting..."
...@@ -37,6 +37,18 @@ if header == None and bib_data == None and bib_keys == None: ...@@ -37,6 +37,18 @@ if header == None and bib_data == None and bib_keys == None:
else: else:
print "done." print "done."
print "Checking encoding...",
encoding_errors = bibfixtools.check_encoding(raw_data)
if not (encoding_errors):
print "done."
else:
print "failed."
print ""
print encoding_errors
print "Fatal error: Found non-ASCII characters. Please fix manually."
print "Fatal error occurred. Exiting..."
exit(1)
need_to_save_changes = False need_to_save_changes = False
print "Checking key encoding...", print "Checking key encoding...",
...@@ -86,16 +98,6 @@ if file_links_ok: ...@@ -86,16 +98,6 @@ if file_links_ok:
print "done." print "done."
else: else:
print "failed." print "failed."
print "Checking entry encoding...",
encoding_ok = bibfixtools.check_encoding(bib_data, header, bib_strings, bib_preamble)
if translate_to_latex:
print "done."
else:
if encoding_ok:
print "done."
else:
print "found non-ASCCI characters!"
print "Checking superseded entries...", print "Checking superseded entries...",
bib_data, superseded_entries_ok, superseded_entries_changed = bibfixtools.check_and_fix_superseded_entries(bib_data, bib_keys, verbose) bib_data, superseded_entries_ok, superseded_entries_changed = bibfixtools.check_and_fix_superseded_entries(bib_data, bib_keys, verbose)
...@@ -106,17 +108,17 @@ if superseded_entries_ok: ...@@ -106,17 +108,17 @@ if superseded_entries_ok:
else: else:
print "failed." print "failed."
if translate_to_latex and not(encoding_ok): #if translate_to_latex and (encoding_errors):
need_to_save_changes = True # need_to_save_changes = True
ubib_data, ubib_header, ubib_strings, ubib_preamble = bibfixtools.convert_to_unicode(bib_data, header, bib_strings, bib_preamble) # ubib_data, ubib_header, ubib_strings, ubib_preamble = bibfixtools.convert_to_unicode(bib_data, header, bib_strings, bib_preamble)
print "Converting non-ASCII characters to latex syntax...", # print "Converting non-ASCII characters to latex syntax...",
bib_data, header, bib_strings, bib_preamble, encoding_ok = bibfixtools.translate_unicode_to_latex(ubib_data, bib_keys, ubib_header, ubib_strings, ubib_preamble, verbose) # bib_data, header, bib_strings, bib_preamble, encoding_ok = bibfixtools.translate_unicode_to_latex(ubib_data, bib_keys, ubib_header, ubib_strings, ubib_preamble, verbose)
if encoding_ok: # if encoding_ok:
print "done." # print "done."
else: # else:
print "Fatal error: ASCII conversion failed. Please fix manually." # print "Fatal error: ASCII conversion failed. Please fix manually."
print "Fatal error occurred. Exiting..." # print "Fatal error occurred. Exiting..."
exit(1) # exit(1)
if need_to_save_changes: if need_to_save_changes:
print "Saving data to ASCII-encoded file '%s'..." % (output_file_name), print "Saving data to ASCII-encoded file '%s'..." % (output_file_name),
......
...@@ -7,6 +7,7 @@ Created on Aug 6, 2013 ...@@ -7,6 +7,7 @@ Created on Aug 6, 2013
import codecs import codecs
import bibchecktools import bibchecktools
import unicode_to_latex import unicode_to_latex
import shutil
import subprocess import subprocess
import re import re
import urllib import urllib
...@@ -32,6 +33,7 @@ def load_bib_file(file_name): ...@@ -32,6 +33,7 @@ def load_bib_file(file_name):
try: try:
bib_file = open(file_name, "r") bib_file = open(file_name, "r")
bib_data_lines = bib_file.readlines() bib_data_lines = bib_file.readlines()
raw_data = bib_data_lines[:]
bib_file.close() bib_file.close()
for i in range(len(bib_data_lines)): for i in range(len(bib_data_lines)):
if bib_data_lines[i].strip() == '': if bib_data_lines[i].strip() == '':
...@@ -96,7 +98,7 @@ def load_bib_file(file_name): ...@@ -96,7 +98,7 @@ def load_bib_file(file_name):
print line.rstrip() print line.rstrip()
print "" print ""
raise IOError raise IOError
return bib_header, bib_data, bib_keys, bib_strings, bib_preamble return raw_data, bib_header, bib_data, bib_keys, bib_strings, bib_preamble
except Exception as err: except Exception as err:
print "" print ""
print err print err
...@@ -212,7 +214,7 @@ def correct_header(header): ...@@ -212,7 +214,7 @@ def correct_header(header):
def save_bib_data_to_ascii_file(bib_data, bib_header, bib_strings, bib_preamble, file_name): def save_bib_data_to_ascii_file(bib_data, bib_header, bib_strings, bib_preamble, file_name):
try: try:
bib_file = codecs.open(filename=file_name, mode='w', encoding='ascii') bib_file = codecs.open(filename=file_name + ".bibfix.tmp", mode='w', encoding='ascii')
bib_header = correct_header(bib_header) bib_header = correct_header(bib_header)
if bib_header != None: if bib_header != None:
bib_file.writelines(bib_header) bib_file.writelines(bib_header)
...@@ -227,6 +229,7 @@ def save_bib_data_to_ascii_file(bib_data, bib_header, bib_strings, bib_preamble, ...@@ -227,6 +229,7 @@ def save_bib_data_to_ascii_file(bib_data, bib_header, bib_strings, bib_preamble,
bib_file.writelines(entry) bib_file.writelines(entry)
bib_file.write('\n') bib_file.write('\n')
bib_file.close() bib_file.close()
shutil.move(file_name + ".bibfix.tmp", file_name)
except Exception as err: except Exception as err:
print "" print ""
print err print err
...@@ -234,35 +237,25 @@ def save_bib_data_to_ascii_file(bib_data, bib_header, bib_strings, bib_preamble, ...@@ -234,35 +237,25 @@ def save_bib_data_to_ascii_file(bib_data, bib_header, bib_strings, bib_preamble,
return True return True
def check_encoding(bib_data, bib_header, bib_strings, bib_preamble): def check_encoding(raw_data):
if bib_header: bOK = True
for line in bib_header: errors = ""
try: for i in xrange(len(raw_data)):
line.encode('ascii') try:
except UnicodeDecodeError: for character in raw_data[i]:
return False if ord(character) > 127:
if bib_strings: if bOK:
for line in bib_strings: bOK = False
try: errors += "Found non-ASCII characters:\n"
line.encode('ascii') errors += "line %d: %s" % (i, raw_data[i])
except UnicodeDecodeError: break
return False except Exception as err:
if bib_preamble: print ""
for line in bib_preamble: print err
try: bOK = False
line.encode('ascii') errors += "Ooops... this is serious!\n"
except UnicodeDecodeError: break
return False return errors
if bib_data:
for entry in bib_data:
for line in entry:
try:
line.encode('ascii')
except UnicodeDecodeError:
return False
else:
return False
return True
def convert_to_unicode(bib_data, bib_header, bib_strings, bib_preamble): def convert_to_unicode(bib_data, bib_header, bib_strings, bib_preamble):
if bib_data: if bib_data:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment