This commit is contained in:
parent
8303353a02
commit
eb1fccee9d
1 changed files with 136 additions and 3 deletions
139
logs2pidgin.py
139
logs2pidgin.py
|
@ -7,6 +7,8 @@ import sys
|
||||||
import hashlib
|
import hashlib
|
||||||
import arrow
|
import arrow
|
||||||
import argparse
|
import argparse
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import csv
|
||||||
|
|
||||||
from pprint import pprint
|
from pprint import pprint
|
||||||
|
|
||||||
|
@ -45,9 +47,118 @@ def logcreate(fpath,contact, dt,account,plugin):
|
||||||
plugin
|
plugin
|
||||||
))
|
))
|
||||||
|
|
||||||
|
def do_facebook(account, logpathbase):
|
||||||
|
plugin = 'facebook'
|
||||||
|
|
||||||
|
# the source data is from a facebook export and pidgin buddy list xml
|
||||||
|
# after the alias was set for every facebook user by hand
|
||||||
|
# the file contains lines constructed:
|
||||||
|
# UID\tDisplay Nice Name
|
||||||
|
lookupf = os.path.expanduser('~/tmp/facebook_lookup.csv')
|
||||||
|
lookup = {}
|
||||||
|
with open(lookupf, newline='') as csvfile:
|
||||||
|
reader = csv.reader(csvfile, delimiter='\t')
|
||||||
|
for row in reader:
|
||||||
|
lookup.update({row[1]: row[0]})
|
||||||
|
|
||||||
|
# the csv file for the messages is from the Facebook Data export
|
||||||
|
# converted with https://pypi.python.org/pypi/fbchat_archive_parser
|
||||||
|
# as: fbcap messages.htm -f csv > ~/tmp/facebook-messages.csv
|
||||||
|
dataf = os.path.expanduser('~/tmp/facebook-messages.csv')
|
||||||
|
reader = csv.DictReader(open(dataf),skipinitialspace=True)
|
||||||
|
for row in reader:
|
||||||
|
# skip conversations for now because I don't have any way of getting
|
||||||
|
# the conversation id
|
||||||
|
if ', ' in row['thread']:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# the seconds are sometimes missing from the timestamps
|
||||||
|
try:
|
||||||
|
dt = arrow.get(row.get('date'), 'YYYY-MM-DDTHH:mmZZ')
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
dt = arrow.get(row.get('date'), 'YYYY-MM-DDTHH:mm:ssZZ')
|
||||||
|
except:
|
||||||
|
logging.error('failed to parse entry: %s', row)
|
||||||
|
|
||||||
|
dt = dt.to('UTC')
|
||||||
|
contact = lookup.get(row.get('thread'))
|
||||||
|
if not contact:
|
||||||
|
continue
|
||||||
|
msg = row.get('message')
|
||||||
|
sender = row.get('sender')
|
||||||
|
|
||||||
|
fpath = os.path.join(
|
||||||
|
logpathbase,
|
||||||
|
plugin,
|
||||||
|
account,
|
||||||
|
contact,
|
||||||
|
logfilename(dt, nulltime=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
if not os.path.isdir(os.path.dirname(fpath)):
|
||||||
|
os.makedirs(os.path.dirname(fpath))
|
||||||
|
logcreate(fpath, contact, dt, account, plugin)
|
||||||
|
logappend(fpath, dt, sender, msg)
|
||||||
|
|
||||||
|
|
||||||
|
def do_zncfixed(znclogs, logpathbase, znctz):
|
||||||
|
# I manually organised the ZNC logs into pidgin-like
|
||||||
|
# plugin/account/contact/logfiles.log
|
||||||
|
# structure before parsing them
|
||||||
|
LINESPLIT = re.compile(
|
||||||
|
r'^\[(?P<hour>[0-9]+):(?P<minute>[0-9]+):(?P<second>[0-9]+)\]\s+'
|
||||||
|
r'<(?P<sender>.*?)>\s+(?P<msg>.*)$'
|
||||||
|
)
|
||||||
|
searchin = os.path.join(
|
||||||
|
znclogs,
|
||||||
|
'**',
|
||||||
|
'*.log'
|
||||||
|
)
|
||||||
|
logs = glob.glob(searchin, recursive=True)
|
||||||
|
for log in logs:
|
||||||
|
contact = os.path.basename(os.path.dirname(log))
|
||||||
|
account = os.path.basename(os.path.dirname(os.path.dirname(log)))
|
||||||
|
plugin = os.path.basename(os.path.dirname(os.path.dirname(os.path.dirname(log))))
|
||||||
|
logging.info('converting log file: %s' % (log))
|
||||||
|
dt = arrow.get(os.path.basename(log).replace('.log', ''), 'YYYY-MM-DD')
|
||||||
|
dt = dt.replace(tzinfo=znctz)
|
||||||
|
|
||||||
|
|
||||||
|
if contact.startswith("#"):
|
||||||
|
fname = "%s.chat" % (contact)
|
||||||
|
else:
|
||||||
|
fname = contact
|
||||||
|
|
||||||
|
fpath = os.path.join(
|
||||||
|
logpathbase,
|
||||||
|
plugin,
|
||||||
|
account,
|
||||||
|
fname,
|
||||||
|
logfilename(dt)
|
||||||
|
)
|
||||||
|
|
||||||
|
if not os.path.isdir(os.path.dirname(fpath)):
|
||||||
|
os.makedirs(os.path.dirname(fpath))
|
||||||
|
|
||||||
|
with open(log, 'rb') as f:
|
||||||
|
for line in f:
|
||||||
|
line = line.decode('utf8', 'ignore')
|
||||||
|
match = LINESPLIT.match(line)
|
||||||
|
if not match:
|
||||||
|
continue
|
||||||
|
dt = dt.replace(
|
||||||
|
hour=int(match.group('hour')),
|
||||||
|
minute=int(match.group('minute')),
|
||||||
|
second=int(match.group('second'))
|
||||||
|
)
|
||||||
|
logcreate(fpath, contact, dt, account, plugin)
|
||||||
|
logappend(fpath, dt, match.group('sender'), match.group('msg'))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def do_msnplus(msgpluslogs, logpathbase, msgplustz):
|
def do_msnplus(msgpluslogs, logpathbase, msgplustz):
|
||||||
from bs4 import BeautifulSoup
|
# from bs4 import BeautifulSoup
|
||||||
NOPAR = re.compile(r'\((.*)\)')
|
NOPAR = re.compile(r'\((.*)\)')
|
||||||
NOCOLON = re.compile(r'(.*):?')
|
NOCOLON = re.compile(r'(.*):?')
|
||||||
|
|
||||||
|
@ -253,13 +364,19 @@ if __name__ == '__main__':
|
||||||
help='absolute path to Pidgin skype logs'
|
help='absolute path to Pidgin skype logs'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
'--facebook_account',
|
||||||
|
default='',
|
||||||
|
help='facebook account name'
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--loglevel',
|
'--loglevel',
|
||||||
default='warning',
|
default='warning',
|
||||||
help='change loglevel'
|
help='change loglevel'
|
||||||
)
|
)
|
||||||
|
|
||||||
for allowed in ['skype', 'trillian', 'msnplus']:
|
for allowed in ['skype', 'trillian', 'msnplus', 'znc', 'facebook']:
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--%s' % allowed,
|
'--%s' % allowed,
|
||||||
action='store_true',
|
action='store_true',
|
||||||
|
@ -267,7 +384,7 @@ if __name__ == '__main__':
|
||||||
help='convert %s logs' % allowed
|
help='convert %s logs' % allowed
|
||||||
)
|
)
|
||||||
|
|
||||||
if allowed != 'skype':
|
if allowed != 'skype' or allowed != 'facebook':
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--%s_logs' % allowed,
|
'--%s_logs' % allowed,
|
||||||
default=os.path.expanduser('~/.%s/logs' % allowed),
|
default=os.path.expanduser('~/.%s/logs' % allowed),
|
||||||
|
@ -299,6 +416,14 @@ if __name__ == '__main__':
|
||||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if params.get('facebook'):
|
||||||
|
logging.info('facebook enabled')
|
||||||
|
do_facebook(
|
||||||
|
params.get('facebook_account'),
|
||||||
|
params.get('pidgin_logs')
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if params.get('skype'):
|
if params.get('skype'):
|
||||||
logging.info('Skype enabled; parsing skype logs')
|
logging.info('Skype enabled; parsing skype logs')
|
||||||
do_skype(
|
do_skype(
|
||||||
|
@ -321,3 +446,11 @@ if __name__ == '__main__':
|
||||||
params.get('pidgin_logs'),
|
params.get('pidgin_logs'),
|
||||||
params.get('msnplus_timezone'),
|
params.get('msnplus_timezone'),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if params.get('znc'):
|
||||||
|
logging.info('ZNC enabled; parsing znc logs')
|
||||||
|
do_zncfixed(
|
||||||
|
params.get('znc_logs'),
|
||||||
|
params.get('pidgin_logs'),
|
||||||
|
params.get('znc_timezone'),
|
||||||
|
)
|
||||||
|
|
Loading…
Reference in a new issue