Download raw (2.6 KB)
#!/usr/bin/env python # -*- coding: utf-8 -*- import codecs import glob import re import os.path from sys import argv figure_patt = re.compile("\((Figure\s+.[^\)]+)\)", re.IGNORECASE) num_patt = re.compile("[\d|I]+(?:\.[\d|\w]+)+") header_patt = re.compile("^Figure [\d|I]+(?:\.[\d|\w]+)+", re.MULTILINE) def clean_caption (caption): single_line = re.sub('\n', ' ', caption.strip('\s\n')) return re.sub('\s+', ' ', single_line) def parse_captions_file (path): captions = {} with codecs.open(path, encoding="utf-8") as f: contents = f.read() start = None num = None for m in header_patt.finditer(contents): if start is not None: captions[num] = clean_caption(contents[start:m.start()]) start = m.start() num = num_patt.search(m.group()).group() captions[num] = clean_caption(contents[start:]) return captions def md_figures (match): figurename = match.group(1) figurepath = glob.glob("{0}/img/{1}.*".format(bookfolder, figurename)) num = num_patt.search(figurename).group() print num if len(figurepath) > 0: figurefile = os.path.split(figurepath[0])[1] mdstring = u"![{0}](img/{1})<span class=\"figcaption\">{2}</span>".format(figurename, figurefile, captions[num]) return mdstring else: figurepath = glob.glob("{0}/img/{1}.*".format(bookfolder, num)) if len(figurepath) > 0: figurefile = os.path.split(figurepath[0])[1] mdstring = u"![{0}](img/{1})<span class=\"figcaption\">{2}</span>".format(figurename, figurefile, captions[num]) return mdstring else: return match.group(0) # for bookfolder in glob.glob("../test/book*"): for bookfolder in glob.glob("{0}/book*".format(argv[1])): for mdpath in glob.glob("{0}/*.md".format(bookfolder)): with codecs.open(mdpath, 'r', encoding='utf-8') as mdfile: print mdpath mdname, ext = os.path.splitext(mdpath) num_match = num_patt.search(mdname) if num_match: num = num_match.group() print num # print "CAPTIONS PREFIX", num captions_path = glob.glob("{0}/{1}*Captions*.md".format(bookfolder, num)) if len(captions_path) > 0 and captions_path[0] <> mdpath: print "FOUND CAPTIONS PATH" # print captions_path captions = parse_captions_file(captions_path[0]) # print captions mdstring = mdfile.read() treated_mdstring = figure_patt.sub(md_figures, mdstring) with codecs.open(mdpath, 'w', encoding='utf-8') as treated_mdfile: treated_mdfile.write(treated_mdstring)