From eb1fccee9d7f8e4f916e7d609d2fe6426c2fd1d1 Mon Sep 17 00:00:00 2001 From: Peter Molnar Date: Sun, 4 Mar 2018 21:09:50 +0000 Subject: [PATCH] --- logs2pidgin.py | 139 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 136 insertions(+), 3 deletions(-) diff --git a/logs2pidgin.py b/logs2pidgin.py index 7b481f0..30dc700 100644 --- a/logs2pidgin.py +++ b/logs2pidgin.py @@ -7,6 +7,8 @@ import sys import hashlib import arrow import argparse +from bs4 import BeautifulSoup +import csv from pprint import pprint @@ -45,9 +47,118 @@ def logcreate(fpath,contact, dt,account,plugin): plugin )) +def do_facebook(account, logpathbase): + plugin = 'facebook' + + # the source data is from a facebook export and pidgin buddy list xml + # after the alias was set for every facebook user by hand + # the file contains lines constructed: + # UID\tDisplay Nice Name + lookupf = os.path.expanduser('~/tmp/facebook_lookup.csv') + lookup = {} + with open(lookupf, newline='') as csvfile: + reader = csv.reader(csvfile, delimiter='\t') + for row in reader: + lookup.update({row[1]: row[0]}) + + # the csv file for the messages is from the Facebook Data export + # converted with https://pypi.python.org/pypi/fbchat_archive_parser + # as: fbcap messages.htm -f csv > ~/tmp/facebook-messages.csv + dataf = os.path.expanduser('~/tmp/facebook-messages.csv') + reader = csv.DictReader(open(dataf),skipinitialspace=True) + for row in reader: + # skip conversations for now because I don't have any way of getting + # the conversation id + if ', ' in row['thread']: + continue + + # the seconds are sometimes missing from the timestamps + try: + dt = arrow.get(row.get('date'), 'YYYY-MM-DDTHH:mmZZ') + except: + try: + dt = arrow.get(row.get('date'), 'YYYY-MM-DDTHH:mm:ssZZ') + except: + logging.error('failed to parse entry: %s', row) + + dt = dt.to('UTC') + contact = lookup.get(row.get('thread')) + if not contact: + continue + msg = row.get('message') + sender = row.get('sender') + + fpath = os.path.join( + logpathbase, + plugin, + account, + contact, + logfilename(dt, nulltime=True) + ) + + if not os.path.isdir(os.path.dirname(fpath)): + os.makedirs(os.path.dirname(fpath)) + logcreate(fpath, contact, dt, account, plugin) + logappend(fpath, dt, sender, msg) + + +def do_zncfixed(znclogs, logpathbase, znctz): + # I manually organised the ZNC logs into pidgin-like + # plugin/account/contact/logfiles.log + # structure before parsing them + LINESPLIT = re.compile( + r'^\[(?P[0-9]+):(?P[0-9]+):(?P[0-9]+)\]\s+' + r'<(?P.*?)>\s+(?P.*)$' + ) + searchin = os.path.join( + znclogs, + '**', + '*.log' + ) + logs = glob.glob(searchin, recursive=True) + for log in logs: + contact = os.path.basename(os.path.dirname(log)) + account = os.path.basename(os.path.dirname(os.path.dirname(log))) + plugin = os.path.basename(os.path.dirname(os.path.dirname(os.path.dirname(log)))) + logging.info('converting log file: %s' % (log)) + dt = arrow.get(os.path.basename(log).replace('.log', ''), 'YYYY-MM-DD') + dt = dt.replace(tzinfo=znctz) + + + if contact.startswith("#"): + fname = "%s.chat" % (contact) + else: + fname = contact + + fpath = os.path.join( + logpathbase, + plugin, + account, + fname, + logfilename(dt) + ) + + if not os.path.isdir(os.path.dirname(fpath)): + os.makedirs(os.path.dirname(fpath)) + + with open(log, 'rb') as f: + for line in f: + line = line.decode('utf8', 'ignore') + match = LINESPLIT.match(line) + if not match: + continue + dt = dt.replace( + hour=int(match.group('hour')), + minute=int(match.group('minute')), + second=int(match.group('second')) + ) + logcreate(fpath, contact, dt, account, plugin) + logappend(fpath, dt, match.group('sender'), match.group('msg')) + + def do_msnplus(msgpluslogs, logpathbase, msgplustz): - from bs4 import BeautifulSoup +# from bs4 import BeautifulSoup NOPAR = re.compile(r'\((.*)\)') NOCOLON = re.compile(r'(.*):?') @@ -253,13 +364,19 @@ if __name__ == '__main__': help='absolute path to Pidgin skype logs' ) + parser.add_argument( + '--facebook_account', + default='', + help='facebook account name' + ) + parser.add_argument( '--loglevel', default='warning', help='change loglevel' ) - for allowed in ['skype', 'trillian', 'msnplus']: + for allowed in ['skype', 'trillian', 'msnplus', 'znc', 'facebook']: parser.add_argument( '--%s' % allowed, action='store_true', @@ -267,7 +384,7 @@ if __name__ == '__main__': help='convert %s logs' % allowed ) - if allowed != 'skype': + if allowed != 'skype' or allowed != 'facebook': parser.add_argument( '--%s_logs' % allowed, default=os.path.expanduser('~/.%s/logs' % allowed), @@ -299,6 +416,14 @@ if __name__ == '__main__': format='%(asctime)s - %(levelname)s - %(message)s' ) + if params.get('facebook'): + logging.info('facebook enabled') + do_facebook( + params.get('facebook_account'), + params.get('pidgin_logs') + ) + + if params.get('skype'): logging.info('Skype enabled; parsing skype logs') do_skype( @@ -321,3 +446,11 @@ if __name__ == '__main__': params.get('pidgin_logs'), params.get('msnplus_timezone'), ) + + if params.get('znc'): + logging.info('ZNC enabled; parsing znc logs') + do_zncfixed( + params.get('znc_logs'), + params.get('pidgin_logs'), + params.get('znc_timezone'), + )