No images in this repository’s iceberg at this time
Download raw (1.4 KB)
from os import listdir from os.path import isfile, join import codecs import re d = '../txt/' files = [f for f in listdir(d) if isfile(join(d, f))] classnumber = '1' words = ['object','thing','material'] exclude = [] strwords = '-'.join(words) strwordsnice = ' '.join(words) strexclude = ' '.join(exclude) with codecs.open('out_'+strwords+'.html','w+', 'utf-8') as output: output.write('<link href="css/stylesheet.css" rel="stylesheet">\n\n') output.write('<small class="info">highlighted words: '+strwordsnice+'</small><br>\n') output.write('<small class="info">excluded words: '+strexclude+'</small>\n\n') output.write('<div id="main">\n') for i, f in enumerate(files): output.write('<div class="source"><small>') output.write(files[i]) output.write('</small><br>') lines = codecs.open(d+f, 'r', 'utf-8').readlines() for i, line in enumerate(lines): line = line.replace('\n','<br>') # tabs m = re.search(r"^\t*", line) if m: tab = len(m.group(0)) + 1 strtab = str(tab) # line = line.replace("\t","") # highlight search word if any(word in line for word in words): if not any(word in line for word in exclude): for word in words: line = line.replace(word,'<span class="high'+classnumber+'">'+word+'</span>') # line = line.encode("utf-8") output.write('\n\n<div class="line t'+strtab+'"><small class="linenumber">'+str(i)+'</small>\n'+line+'\n</div>') output.write('</div>\n\n') print '*output written*'