the-riddle
clone your own copy | download snapshot

Snapshots | iceberg

Inside this repository

replace_images.py
text/x-python

Download raw (2.6 KB)

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import codecs
import glob
import re
import os.path
from sys import argv

figure_patt = re.compile("\((Figure\s+.[^\)]+)\)", re.IGNORECASE)
num_patt = re.compile("[\d|I]+(?:\.[\d|\w]+)+")
header_patt = re.compile("^Figure [\d|I]+(?:\.[\d|\w]+)+", re.MULTILINE)

def clean_caption (caption):
  single_line = re.sub('\n', ' ', caption.strip('\s\n'))
  return re.sub('\s+', ' ', single_line)

def parse_captions_file (path):
  captions = {}

  with codecs.open(path, encoding="utf-8") as f:
    contents = f.read()
    start = None
    num = None

    for m in header_patt.finditer(contents):
      if start is not None:
        captions[num] = clean_caption(contents[start:m.start()])

      start = m.start()
      num = num_patt.search(m.group()).group()

    captions[num] = clean_caption(contents[start:])

  return captions

def md_figures (match):

  figurename = match.group(1)
  figurepath = glob.glob("{0}/img/{1}.*".format(bookfolder, figurename))
  num = num_patt.search(figurename).group() 

  print num

  if len(figurepath) > 0:
    figurefile = os.path.split(figurepath[0])[1]
    mdstring = u"![{0}](img/{1})<span class=\"figcaption\">{2}</span>".format(figurename, figurefile, captions[num])
    return mdstring
  else:
    figurepath = glob.glob("{0}/img/{1}.*".format(bookfolder, num))
    if len(figurepath) > 0:
      figurefile = os.path.split(figurepath[0])[1]
      mdstring = u"![{0}](img/{1})<span class=\"figcaption\">{2}</span>".format(figurename, figurefile, captions[num])
      return mdstring
    else:
      return match.group(0)

# for bookfolder in glob.glob("../test/book*"):
for bookfolder in glob.glob("{0}/book*".format(argv[1])):
  for mdpath in glob.glob("{0}/*.md".format(bookfolder)):
    with codecs.open(mdpath, 'r', encoding='utf-8') as mdfile:
      print mdpath
      
      mdname, ext = os.path.splitext(mdpath)
      num_match = num_patt.search(mdname)

      if num_match:
        num = num_match.group()
        print num
        # print "CAPTIONS PREFIX", num
        captions_path = glob.glob("{0}/{1}*Captions*.md".format(bookfolder, num))

        if len(captions_path) > 0 and captions_path[0] <> mdpath:
          print "FOUND CAPTIONS PATH"
          # print captions_path
          captions = parse_captions_file(captions_path[0])
          
          # print captions

          mdstring = mdfile.read()
          treated_mdstring = figure_patt.sub(md_figures, mdstring)
          
          with codecs.open(mdpath, 'w', encoding='utf-8') as treated_mdfile:
            treated_mdfile.write(treated_mdstring)