From de14c33321a142b82a8124a546e51991a910f3d7 Mon Sep 17 00:00:00 2001 From: Peter Molnar Date: Wed, 28 Feb 2018 20:18:59 +0000 Subject: [PATCH] --- logs2pidgin.py | 363 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 363 insertions(+) create mode 100644 logs2pidgin.py diff --git a/logs2pidgin.py b/logs2pidgin.py new file mode 100644 index 0000000..4d91d3e --- /dev/null +++ b/logs2pidgin.py @@ -0,0 +1,363 @@ +import os +import sqlite3 +import logging +import re +import glob +import sys +import hashlib +import arrow +import argparse + + +from pprint import pprint + +def logfilename(dt, nulltime=False): + if nulltime: + t = '000000' + else: + t = dt.format('HHmmss') + + return "%s.%s%s%s.txt" % ( + dt.format("YYYY-MM-DD"), + t, + dt.datetime.strftime("%z"), + dt.datetime.strftime("%Z") + ) + +def logappend(fpath,dt,sender,msg): + logging.debug('appending log: %s' % (fpath)) + with open(fpath, 'at') as f: + f.write("(%s) %s: %s\n" % ( + dt.format('YYYY-MM-DD HH:mm:ss'), + sender, + msg + )) + os.utime(fpath, (dt.timestamp, dt.timestamp)) + os.utime(os.path.dirname(fpath), (dt.timestamp, dt.timestamp)) + +def logcreate(fpath,contact, dt,account,plugin): + logging.info('creating converted log: %s' % (fpath)) + if not os.path.exists(fpath): + with open(fpath, 'wt') as f: + f.write("Conversation with %s at %s on %s (%s)\n" % ( + contact, + dt.format('ddd dd MMM YYYY hh:mm:ss A ZZZ'), + account, + plugin + )) + + +def do_msnplus(msgpluslogs, logpathbase, msgplustz): + from bs4 import BeautifulSoup + NOPAR = re.compile(r'\((.*)\)') + NOCOLON = re.compile(r'(.*):?') + + searchin = os.path.join( + msgpluslogs, + '**', + '*.html' + ) + logs = glob.glob(searchin, recursive=True) + plugin = 'msn' + for log in logs: + logging.info('converting log file: %s' % (log)) + contact = os.path.basename(os.path.dirname(log)) + + with open(log, 'rt', encoding='UTF-16') as f: + html = BeautifulSoup(f.read(), "html.parser") + account = html.find_all('li', attrs={'class':'in'}, limit=1)[0] + account = NOPAR.sub('\g<1>', account.span.string) + for session in html.findAll(attrs={'class': 'mplsession'}): + dt = arrow.get( + session.get('id').replace('Session_', ''), + 'YYYY-MM-DDTHH-mm-ss' + ) + dt = dt.replace(tzinfo=msgplustz) + seconds = int(dt.format('s')) + + fpath = os.path.join( + logpathbase, + plugin, + account, + contact, + logfilename(dt) + ) + + if not os.path.isdir(os.path.dirname(fpath)): + os.makedirs(os.path.dirname(fpath)) + + for line in session.findAll('tr'): + if seconds == 59: + seconds = 0 + else: + seconds = seconds + 1 + + tspan = line.find(attrs={'class': 'time'}).extract() + time = tspan.string.replace('(', '').replace(')','').strip().split(':') + + sender = line.find('th').string + if not sender: + continue + + sender = sender.strip().split(':')[0] + msg = line.find('td').get_text() + + mindt = dt.replace( + hour=int(time[0]), + minute=int(time[1]), + second=int(seconds) + ) + + logcreate(fpath, contact, dt, account, plugin) + logappend(fpath, mindt, sender, msg) + + +def do_trillian(trillianlogs, logpathbase, trilliantz): + + SPLIT_SESSIONS = re.compile( + r'^Session Start\s+\((?P.*)?\):\s+(?P[^\n]+)' + r'\n(?P(?:.|\n)*?)(?=Session)', + re.MULTILINE + ) + + SPLIT_MESSAGES = re.compile( + r'\[(?P