This commit is contained in:
Peter Molnar 2018-03-04 21:09:50 +00:00 committed by GitHub
parent 8303353a02
commit eb1fccee9d

View file

@ -7,6 +7,8 @@ import sys
import hashlib import hashlib
import arrow import arrow
import argparse import argparse
from bs4 import BeautifulSoup
import csv
from pprint import pprint from pprint import pprint
@ -45,9 +47,118 @@ def logcreate(fpath,contact, dt,account,plugin):
plugin plugin
)) ))
def do_facebook(account, logpathbase):
plugin = 'facebook'
# the source data is from a facebook export and pidgin buddy list xml
# after the alias was set for every facebook user by hand
# the file contains lines constructed:
# UID\tDisplay Nice Name
lookupf = os.path.expanduser('~/tmp/facebook_lookup.csv')
lookup = {}
with open(lookupf, newline='') as csvfile:
reader = csv.reader(csvfile, delimiter='\t')
for row in reader:
lookup.update({row[1]: row[0]})
# the csv file for the messages is from the Facebook Data export
# converted with https://pypi.python.org/pypi/fbchat_archive_parser
# as: fbcap messages.htm -f csv > ~/tmp/facebook-messages.csv
dataf = os.path.expanduser('~/tmp/facebook-messages.csv')
reader = csv.DictReader(open(dataf),skipinitialspace=True)
for row in reader:
# skip conversations for now because I don't have any way of getting
# the conversation id
if ', ' in row['thread']:
continue
# the seconds are sometimes missing from the timestamps
try:
dt = arrow.get(row.get('date'), 'YYYY-MM-DDTHH:mmZZ')
except:
try:
dt = arrow.get(row.get('date'), 'YYYY-MM-DDTHH:mm:ssZZ')
except:
logging.error('failed to parse entry: %s', row)
dt = dt.to('UTC')
contact = lookup.get(row.get('thread'))
if not contact:
continue
msg = row.get('message')
sender = row.get('sender')
fpath = os.path.join(
logpathbase,
plugin,
account,
contact,
logfilename(dt, nulltime=True)
)
if not os.path.isdir(os.path.dirname(fpath)):
os.makedirs(os.path.dirname(fpath))
logcreate(fpath, contact, dt, account, plugin)
logappend(fpath, dt, sender, msg)
def do_zncfixed(znclogs, logpathbase, znctz):
# I manually organised the ZNC logs into pidgin-like
# plugin/account/contact/logfiles.log
# structure before parsing them
LINESPLIT = re.compile(
r'^\[(?P<hour>[0-9]+):(?P<minute>[0-9]+):(?P<second>[0-9]+)\]\s+'
r'<(?P<sender>.*?)>\s+(?P<msg>.*)$'
)
searchin = os.path.join(
znclogs,
'**',
'*.log'
)
logs = glob.glob(searchin, recursive=True)
for log in logs:
contact = os.path.basename(os.path.dirname(log))
account = os.path.basename(os.path.dirname(os.path.dirname(log)))
plugin = os.path.basename(os.path.dirname(os.path.dirname(os.path.dirname(log))))
logging.info('converting log file: %s' % (log))
dt = arrow.get(os.path.basename(log).replace('.log', ''), 'YYYY-MM-DD')
dt = dt.replace(tzinfo=znctz)
if contact.startswith("#"):
fname = "%s.chat" % (contact)
else:
fname = contact
fpath = os.path.join(
logpathbase,
plugin,
account,
fname,
logfilename(dt)
)
if not os.path.isdir(os.path.dirname(fpath)):
os.makedirs(os.path.dirname(fpath))
with open(log, 'rb') as f:
for line in f:
line = line.decode('utf8', 'ignore')
match = LINESPLIT.match(line)
if not match:
continue
dt = dt.replace(
hour=int(match.group('hour')),
minute=int(match.group('minute')),
second=int(match.group('second'))
)
logcreate(fpath, contact, dt, account, plugin)
logappend(fpath, dt, match.group('sender'), match.group('msg'))
def do_msnplus(msgpluslogs, logpathbase, msgplustz): def do_msnplus(msgpluslogs, logpathbase, msgplustz):
from bs4 import BeautifulSoup # from bs4 import BeautifulSoup
NOPAR = re.compile(r'\((.*)\)') NOPAR = re.compile(r'\((.*)\)')
NOCOLON = re.compile(r'(.*):?') NOCOLON = re.compile(r'(.*):?')
@ -253,13 +364,19 @@ if __name__ == '__main__':
help='absolute path to Pidgin skype logs' help='absolute path to Pidgin skype logs'
) )
parser.add_argument(
'--facebook_account',
default='',
help='facebook account name'
)
parser.add_argument( parser.add_argument(
'--loglevel', '--loglevel',
default='warning', default='warning',
help='change loglevel' help='change loglevel'
) )
for allowed in ['skype', 'trillian', 'msnplus']: for allowed in ['skype', 'trillian', 'msnplus', 'znc', 'facebook']:
parser.add_argument( parser.add_argument(
'--%s' % allowed, '--%s' % allowed,
action='store_true', action='store_true',
@ -267,7 +384,7 @@ if __name__ == '__main__':
help='convert %s logs' % allowed help='convert %s logs' % allowed
) )
if allowed != 'skype': if allowed != 'skype' or allowed != 'facebook':
parser.add_argument( parser.add_argument(
'--%s_logs' % allowed, '--%s_logs' % allowed,
default=os.path.expanduser('~/.%s/logs' % allowed), default=os.path.expanduser('~/.%s/logs' % allowed),
@ -299,6 +416,14 @@ if __name__ == '__main__':
format='%(asctime)s - %(levelname)s - %(message)s' format='%(asctime)s - %(levelname)s - %(message)s'
) )
if params.get('facebook'):
logging.info('facebook enabled')
do_facebook(
params.get('facebook_account'),
params.get('pidgin_logs')
)
if params.get('skype'): if params.get('skype'):
logging.info('Skype enabled; parsing skype logs') logging.info('Skype enabled; parsing skype logs')
do_skype( do_skype(
@ -321,3 +446,11 @@ if __name__ == '__main__':
params.get('pidgin_logs'), params.get('pidgin_logs'),
params.get('msnplus_timezone'), params.get('msnplus_timezone'),
) )
if params.get('znc'):
logging.info('ZNC enabled; parsing znc logs')
do_zncfixed(
params.get('znc_logs'),
params.get('pidgin_logs'),
params.get('znc_timezone'),
)