Commit a872a331 authored by Johannes Zeman's avatar Johannes Zeman

fixed bibfix non-ASCII character recognition

parent ed646c49
......@@ -29,7 +29,7 @@ verbose = args.verbose
translate_to_latex = args.unicode_to_latex
print "Loading data from '%s'..." % (bib_file_name),
header, bib_data, bib_keys, bib_strings, bib_preamble = bibfixtools.load_bib_file(bib_file_name)
raw_data, header, bib_data, bib_keys, bib_strings, bib_preamble = bibfixtools.load_bib_file(bib_file_name)
if header == None and bib_data == None and bib_keys == None:
print "failed."
print "Fatal error occurred. Exiting..."
......@@ -37,6 +37,18 @@ if header == None and bib_data == None and bib_keys == None:
else:
print "done."
print "Checking encoding...",
encoding_errors = bibfixtools.check_encoding(raw_data)
if not (encoding_errors):
print "done."
else:
print "failed."
print ""
print encoding_errors
print "Fatal error: Found non-ASCII characters. Please fix manually."
print "Fatal error occurred. Exiting..."
exit(1)
need_to_save_changes = False
print "Checking key encoding...",
......@@ -86,16 +98,6 @@ if file_links_ok:
print "done."
else:
print "failed."
print "Checking entry encoding...",
encoding_ok = bibfixtools.check_encoding(bib_data, header, bib_strings, bib_preamble)
if translate_to_latex:
print "done."
else:
if encoding_ok:
print "done."
else:
print "found non-ASCCI characters!"
print "Checking superseded entries...",
bib_data, superseded_entries_ok, superseded_entries_changed = bibfixtools.check_and_fix_superseded_entries(bib_data, bib_keys, verbose)
......@@ -106,17 +108,17 @@ if superseded_entries_ok:
else:
print "failed."
if translate_to_latex and not(encoding_ok):
need_to_save_changes = True
ubib_data, ubib_header, ubib_strings, ubib_preamble = bibfixtools.convert_to_unicode(bib_data, header, bib_strings, bib_preamble)
print "Converting non-ASCII characters to latex syntax...",
bib_data, header, bib_strings, bib_preamble, encoding_ok = bibfixtools.translate_unicode_to_latex(ubib_data, bib_keys, ubib_header, ubib_strings, ubib_preamble, verbose)
if encoding_ok:
print "done."
else:
print "Fatal error: ASCII conversion failed. Please fix manually."
print "Fatal error occurred. Exiting..."
exit(1)
#if translate_to_latex and (encoding_errors):
# need_to_save_changes = True
# ubib_data, ubib_header, ubib_strings, ubib_preamble = bibfixtools.convert_to_unicode(bib_data, header, bib_strings, bib_preamble)
# print "Converting non-ASCII characters to latex syntax...",
# bib_data, header, bib_strings, bib_preamble, encoding_ok = bibfixtools.translate_unicode_to_latex(ubib_data, bib_keys, ubib_header, ubib_strings, ubib_preamble, verbose)
# if encoding_ok:
# print "done."
# else:
# print "Fatal error: ASCII conversion failed. Please fix manually."
# print "Fatal error occurred. Exiting..."
# exit(1)
if need_to_save_changes:
print "Saving data to ASCII-encoded file '%s'..." % (output_file_name),
......
......@@ -7,6 +7,7 @@ Created on Aug 6, 2013
import codecs
import bibchecktools
import unicode_to_latex
import shutil
import subprocess
import re
import urllib
......@@ -32,6 +33,7 @@ def load_bib_file(file_name):
try:
bib_file = open(file_name, "r")
bib_data_lines = bib_file.readlines()
raw_data = bib_data_lines[:]
bib_file.close()
for i in range(len(bib_data_lines)):
if bib_data_lines[i].strip() == '':
......@@ -96,7 +98,7 @@ def load_bib_file(file_name):
print line.rstrip()
print ""
raise IOError
return bib_header, bib_data, bib_keys, bib_strings, bib_preamble
return raw_data, bib_header, bib_data, bib_keys, bib_strings, bib_preamble
except Exception as err:
print ""
print err
......@@ -212,7 +214,7 @@ def correct_header(header):
def save_bib_data_to_ascii_file(bib_data, bib_header, bib_strings, bib_preamble, file_name):
try:
bib_file = codecs.open(filename=file_name, mode='w', encoding='ascii')
bib_file = codecs.open(filename=file_name + ".bibfix.tmp", mode='w', encoding='ascii')
bib_header = correct_header(bib_header)
if bib_header != None:
bib_file.writelines(bib_header)
......@@ -227,6 +229,7 @@ def save_bib_data_to_ascii_file(bib_data, bib_header, bib_strings, bib_preamble,
bib_file.writelines(entry)
bib_file.write('\n')
bib_file.close()
shutil.move(file_name + ".bibfix.tmp", file_name)
except Exception as err:
print ""
print err
......@@ -234,35 +237,25 @@ def save_bib_data_to_ascii_file(bib_data, bib_header, bib_strings, bib_preamble,
return True
def check_encoding(bib_data, bib_header, bib_strings, bib_preamble):
if bib_header:
for line in bib_header:
try:
line.encode('ascii')
except UnicodeDecodeError:
return False
if bib_strings:
for line in bib_strings:
try:
line.encode('ascii')
except UnicodeDecodeError:
return False
if bib_preamble:
for line in bib_preamble:
try:
line.encode('ascii')
except UnicodeDecodeError:
return False
if bib_data:
for entry in bib_data:
for line in entry:
try:
line.encode('ascii')
except UnicodeDecodeError:
return False
else:
return False
return True
def check_encoding(raw_data):
bOK = True
errors = ""
for i in xrange(len(raw_data)):
try:
for character in raw_data[i]:
if ord(character) > 127:
if bOK:
bOK = False
errors += "Found non-ASCII characters:\n"
errors += "line %d: %s" % (i, raw_data[i])
break
except Exception as err:
print ""
print err
bOK = False
errors += "Ooops... this is serious!\n"
break
return errors
def convert_to_unicode(bib_data, bib_header, bib_strings, bib_preamble):
if bib_data:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment