""" chatlog.Reader chatlog.Writer """ from HTMLParser import HTMLParser import time import datetime import sys import re def _d(*args): for a in args: sys.stderr.write('%s '%a) sys.stderr.write('\n') class XChatParser: def __init__(self, fn): f = open(fn) data_str = f.read() self.chat = [] data = data_str.splitlines() for d in data: s0 = d.split(' <') if len(s0) == 1: if d.startswith('**** BEGIN'): self.decode_start(d) else: if d.startswith('Type'): continue s1 = '%d %s'%(self.start_date.year,s0[0]) dt = datetime.datetime.strptime(s1,'%Y %b %d %H:%M:%S') s2 = s0[1].split('>') author = s2[0] t = s2[-1] self.chat.append({'date':dt, 'author':author, 'text':t, 'type':'chatlog'}) def decode_start(self, l): pat = '**** BEGIN LOGGING AT %a %b %d %H:%M:%S %Y' self.start_date = datetime.datetime.strptime(l, pat) # create a subclass and override the handler methods class SkypeParser(HTMLParser): states = [ 'no_op' ,'created', 'new_message_local_user', 'new_message_remote', 'remote_user', 'user_end' , 'time', 'content' ] def start(self, filename): data_str = False self.state = 'no_op' self.current = {} self.chat = [] try: f = open(filename) data_str = f.read() except IOError: print 'Cannot open %s: expect missing data'%(filename,) except Exception as e: print 'Unhandle exception: %s'%(e,) finally: f.close() self.feed(data_str.replace(r'
','\n')) def handle_starttag(self, tag, attrs): da = {} for a in attrs: da[a[0]] = a[1].split() try: getattr(self, tag)(da) except AttributeError: pass def dt(self, da): if 'class' in da: if 'remote' in da['class']: self.state = 'new_message_remote' elif 'local' in da['class']: self.state = 'new_message_local_user' def a(self, da): if self.state == 'new_message_remote': self.state = 'remote_user' def span(self, da): if self.state == 'user_end': self.state = 'time' def p(self, da): if self.state == 'no_op': self.state = 'created' def dd(self, da): if self.state == 'time': self.state = 'content' def h3(self, da): if self.state == 'no_op': self.state = 'change_start' #def handle_endtag(self, tag): #pass def new_message_local_user(self,data): #sys.stderr.write('=> new_message_local_user\n') self.current['author'] = data self.state = 'user_end' def remote_user(self,data): #sys.stderr.write('=> remote_user\n') self.current['author'] = data self.state = 'user_end' def time(self, data): dt = data.split(':') tmp_t = datetime.time(int(dt[0]),int(dt[1]),int(dt[2])) #if tmp_t.hour < self.start_date.hour: #raise dc = datetime.datetime.combine(self.start_date, tmp_t) self.current['date'] = dc def content(self, data): #sys.stderr.write('=> content\n') self.current['text'] = data self.current['type'] = 'chatlog' self.state = 'no_op' self.chat.append(self.current) #sys.stderr.write('=> %s\n'%(self.current,)) self.current = {} def created(self, data): self.start_date = datetime.datetime.strptime(data, 'Created on %Y-%m-%d %H:%M:%S.') self.state = 'no_op' def change_start(self, data): self.start_date = datetime.datetime.strptime(data, '%Y-%m-%d') self.state = 'no_op' def handle_data(self, data): try: getattr(self, self.state)(data) except AttributeError: pass class Reader: def __init__(self, filename, images = [], delta = 120): self.chat = [] sp = None if filename.endswith('html'): sp = SkypeParser() sp.start(filename) else: sp = XChatParser(filename) t_delta = datetime.timedelta(0,delta,0) c_len = len(sp.chat) c_idx = 0 tmp = [] in_flag = False for im in images: for c in range(0, c_len): itvl = self.abs_itvl(sp.chat[c]['date'], im['date']) #sys.stderr.write('[%s][%d] %s \t %s\n'%(filename, c, itvl,itvl < t_delta)) if itvl < t_delta: in_flag = True if c not in tmp: #_d(filename,c,sp.chat[c]['text']) tmp.append(c) elif in_flag: in_flag = False break for i in tmp: self.chat.append(sp.chat[i]) def abs_itvl(self, a, b): if a < b: return b - a else: return a - b class Writer: tex_special_chars = {r'&': '\\&', r'%': '\\%', r'$': '\\$', r'#': '\\#', r'_': '\\_', r'{': '\\{', r'}': '\\}', r'~': '\\textasciitilde{}', r'^': '\\textasciicircum{}', '\\' : '\\textbackslash{}', '|':'\\textbar{}'} def __init__(self, cldict): self.log = cldict self.et_pat = '[%s]'%(re.escape(''.join(self.tex_special_chars.keys())),) def as_string(self): et_pat = '[%s]'%(re.escape(''.join(self.tex_special_chars.keys())),) esc_text = self.text if 'tex_escaped' not in self.log: esc_text = self.escape_tex(self.text) ret = [] #ret.append('\\section[chat:%d]{%d}'%(self.id,self.id)) #ret.append('\\it{%s /%s/} '%(self.author, self.date.strftime('%d.%m.%Y'))) #ret.append('\\tf{%s}'%(esc_text,)) #ret.append('\n') ret.append('\\chatbox{\\stylechatpiece{%s} }{\\stylechatinfo{%s %s}}{\\stylechat{%s}}'%(self.id,self.author, self.date.strftime('%H:%M'), esc_text)) return '\n\n'.join(ret) def escape_tex_cb(self, pt): r = pt.group() if r in self.tex_special_chars: return self.tex_special_chars[r] return r def escape_tex(self, text): return re.sub(self.et_pat, getattr(self, 'escape_tex_cb') , text) def __getattr__(self, name): try: return self.log[name] except Exception: raise AttributeError(name)