species-of-things
clone your own copy | download snapshot

Snapshots | iceberg

No images in this repository’s iceberg at this time

Inside this repository

search-word-groups_to-txt.py
text/x-python

Download raw (723 bytes)

from os import listdir
from os.path import isfile, join
from nltk.tokenize import word_tokenize
from nltk import pos_tag
from nltk import ngrams

d = '../txt/'
files = [f for f in listdir(d) if isfile(join(d, f))]

words = ['object','thing','material']
strwords = '-'.join(words)

with open('out_'+strwords+'.txt','w+') as output:
	for i, f in enumerate(files):
		output.write('********************************************************\n')
		output.write(files[i]+'\n')
		output.write('********************************************************\n\n')
		lines = open(d+f,'r').readlines()
		for line in lines:
			if any(word in line for word in words):
				output.write(line+'\n')
		output.write('\n')

print '*output written*'