species-of-things
clone your own copy | download snapshot

Snapshots | iceberg

No images in this repository’s iceberg at this time

Inside this repository

search-word-groups_to-html.py
text/x-python

Download raw (1.4 KB)

from os import listdir
from os.path import isfile, join
import codecs
import re

d = '../txt/'
files = [f for f in listdir(d) if isfile(join(d, f))]

classnumber = '1'
words = ['object','thing','material']
exclude = []
strwords = '-'.join(words)
strwordsnice = ' '.join(words)
strexclude = ' '.join(exclude)

with codecs.open('out_'+strwords+'.html','w+', 'utf-8') as output:
	output.write('<link href="css/stylesheet.css" rel="stylesheet">\n\n')
	output.write('<small class="info">highlighted words: '+strwordsnice+'</small><br>\n')
	output.write('<small class="info">excluded words: '+strexclude+'</small>\n\n')
	output.write('<div id="main">\n')
	for i, f in enumerate(files):
		output.write('<div class="source"><small>')
		output.write(files[i])
		output.write('</small><br>')
		lines = codecs.open(d+f, 'r', 'utf-8').readlines()
		for i, line in enumerate(lines):
			line = line.replace('\n','<br>')

			# tabs
			m = re.search(r"^\t*", line)
			if m:
				tab = len(m.group(0)) + 1
				strtab = str(tab)
			# line = line.replace("\t","")

			# highlight search word
			if any(word in line for word in words):
				if not any(word in line for word in exclude):
					for word in words:
						line = line.replace(word,'<span class="high'+classnumber+'">'+word+'</span>')
						# line = line.encode("utf-8")
					output.write('\n\n<div class="line t'+strtab+'"><small class="linenumber">'+str(i)+'</small>\n'+line+'\n</div>')
	output.write('</div>\n\n')

print '*output written*'