the-riddle
clone your own copy | download snapshot

Snapshots | iceberg

Inside this repository

modify_images.py
text/x-python

Download raw (1.6 KB)

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import re
import glob
import codecs
import os.path
import argparse

parser = argparse.ArgumentParser("Insert image references")
parser.add_argument('folder', help="Stories folder, with HTML files")
args = parser.parse_args()

def insert_image_references (m):
  src = m.group('src')
  alt = m.group('alt')
  className = m.group('className') if m.group('className') else ''
  caption = m.group('caption')
  print alt
  num = re.search("[I\.\dabc]+$", alt).group(0)
  return u'<span class="figure {2}"><span class="img_ref" data-for="figure_{3}">{3}</span><img src="{0}" alt="{1}" class="{2}" id="figure_{3}" /><span class="figcaption">{4}</span></span>'.format(src, alt, className, num, caption)

for bookfolder in glob.glob("{0}/book*".format(os.path.normpath(args.folder))):
  print "{0}".format(bookfolder)
  for htmlpath in glob.glob("{0}/*.html".format(os.path.normpath(bookfolder))):
    print "\t{0}".format(htmlpath)
    htmlstring = None

    with codecs.open(htmlpath, mode='r', encoding='utf-8') as htmlfile:
      htmlstring = htmlfile.read()
      htmlstring = re.sub('\<img src="(?P<src>.[^\"]+)" alt="(?P<alt>.[^\"]+)" (?:class="(?P<className>.[^\"]+)" )?\/\>\<span class="figcaption"\>(?P<caption>.*?)\<\/span\>', insert_image_references, htmlstring)
      
      # matches = re.finditer('\<img src="(.[^\"]+)" alt="(.[^\"]+)" (?:class="(.[^\"]+)" )?/\>', htmlstring)
      # if matches:
      #   for m in matches:
      #     print m.group(1), m.group(2), m.group(3)

    if htmlstring:
      with codecs.open(htmlpath, mode='w', encoding='utf-8') as htmlfile:
        htmlfile.write(htmlstring)