No images in this repository’s iceberg at this time
Download raw (723 bytes)
from os import listdir from os.path import isfile, join from nltk.tokenize import word_tokenize from nltk import pos_tag from nltk import ngrams d = '../txt/' files = [f for f in listdir(d) if isfile(join(d, f))] words = ['object','thing','material'] strwords = '-'.join(words) with open('out_'+strwords+'.txt','w+') as output: for i, f in enumerate(files): output.write('********************************************************\n') output.write(files[i]+'\n') output.write('********************************************************\n\n') lines = open(d+f,'r').readlines() for line in lines: if any(word in line for word in words): output.write(line+'\n') output.write('\n') print '*output written*'