Download raw (2.6 KB)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import codecs
import glob
import re
import os.path
from sys import argv
figure_patt = re.compile("\((Figure\s+.[^\)]+)\)", re.IGNORECASE)
num_patt = re.compile("[\d|I]+(?:\.[\d|\w]+)+")
header_patt = re.compile("^Figure [\d|I]+(?:\.[\d|\w]+)+", re.MULTILINE)
def clean_caption (caption):
single_line = re.sub('\n', ' ', caption.strip('\s\n'))
return re.sub('\s+', ' ', single_line)
def parse_captions_file (path):
captions = {}
with codecs.open(path, encoding="utf-8") as f:
contents = f.read()
start = None
num = None
for m in header_patt.finditer(contents):
if start is not None:
captions[num] = clean_caption(contents[start:m.start()])
start = m.start()
num = num_patt.search(m.group()).group()
captions[num] = clean_caption(contents[start:])
return captions
def md_figures (match):
figurename = match.group(1)
figurepath = glob.glob("{0}/img/{1}.*".format(bookfolder, figurename))
num = num_patt.search(figurename).group()
print num
if len(figurepath) > 0:
figurefile = os.path.split(figurepath[0])[1]
mdstring = u"<span class=\"figcaption\">{2}</span>".format(figurename, figurefile, captions[num])
return mdstring
else:
figurepath = glob.glob("{0}/img/{1}.*".format(bookfolder, num))
if len(figurepath) > 0:
figurefile = os.path.split(figurepath[0])[1]
mdstring = u"<span class=\"figcaption\">{2}</span>".format(figurename, figurefile, captions[num])
return mdstring
else:
return match.group(0)
# for bookfolder in glob.glob("../test/book*"):
for bookfolder in glob.glob("{0}/book*".format(argv[1])):
for mdpath in glob.glob("{0}/*.md".format(bookfolder)):
with codecs.open(mdpath, 'r', encoding='utf-8') as mdfile:
print mdpath
mdname, ext = os.path.splitext(mdpath)
num_match = num_patt.search(mdname)
if num_match:
num = num_match.group()
print num
# print "CAPTIONS PREFIX", num
captions_path = glob.glob("{0}/{1}*Captions*.md".format(bookfolder, num))
if len(captions_path) > 0 and captions_path[0] <> mdpath:
print "FOUND CAPTIONS PATH"
# print captions_path
captions = parse_captions_file(captions_path[0])
# print captions
mdstring = mdfile.read()
treated_mdstring = figure_patt.sub(md_figures, mdstring)
with codecs.open(mdpath, 'w', encoding='utf-8') as treated_mdfile:
treated_mdfile.write(treated_mdstring)