the-riddle
clone your own copy | download snapshot

Snapshots | iceberg

Inside this repository

keywords.py
text/x-python

Download raw (876 bytes)

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import re
import glob
import codecs
import os.path
import argparse

parser = argparse.ArgumentParser("Replace words in double plusses with keyword span")
parser.add_argument('folder', help="Stories folder, with HTML files")
args = parser.parse_args()

for bookfolder in glob.glob("{0}/book*".format(os.path.normpath(args.folder))):
  print "{0}".format(bookfolder)
  for htmlpath in glob.glob("{0}/*.html".format(os.path.normpath(bookfolder))):
    print "\t{0}".format(htmlpath)
    htmlstring = None

    with codecs.open(htmlpath, mode='r', encoding='utf-8') as htmlfile:
      htmlstring = htmlfile.read()
      htmlstring = re.sub(r'\+\+(.+?)\+\+', '<span class="keyword">$1</span>', htmlstring)

    if htmlstring:
      with codecs.open(htmlpath, mode='w', encoding='utf-8') as htmlfile:
        htmlfile.write(htmlstring)