This commit is contained in:
parent
8303353a02
commit
eb1fccee9d
1 changed files with 136 additions and 3 deletions
139
logs2pidgin.py
139
logs2pidgin.py
|
@ -7,6 +7,8 @@ import sys
|
|||
import hashlib
|
||||
import arrow
|
||||
import argparse
|
||||
from bs4 import BeautifulSoup
|
||||
import csv
|
||||
|
||||
from pprint import pprint
|
||||
|
||||
|
@ -45,9 +47,118 @@ def logcreate(fpath,contact, dt,account,plugin):
|
|||
plugin
|
||||
))
|
||||
|
||||
def do_facebook(account, logpathbase):
|
||||
plugin = 'facebook'
|
||||
|
||||
# the source data is from a facebook export and pidgin buddy list xml
|
||||
# after the alias was set for every facebook user by hand
|
||||
# the file contains lines constructed:
|
||||
# UID\tDisplay Nice Name
|
||||
lookupf = os.path.expanduser('~/tmp/facebook_lookup.csv')
|
||||
lookup = {}
|
||||
with open(lookupf, newline='') as csvfile:
|
||||
reader = csv.reader(csvfile, delimiter='\t')
|
||||
for row in reader:
|
||||
lookup.update({row[1]: row[0]})
|
||||
|
||||
# the csv file for the messages is from the Facebook Data export
|
||||
# converted with https://pypi.python.org/pypi/fbchat_archive_parser
|
||||
# as: fbcap messages.htm -f csv > ~/tmp/facebook-messages.csv
|
||||
dataf = os.path.expanduser('~/tmp/facebook-messages.csv')
|
||||
reader = csv.DictReader(open(dataf),skipinitialspace=True)
|
||||
for row in reader:
|
||||
# skip conversations for now because I don't have any way of getting
|
||||
# the conversation id
|
||||
if ', ' in row['thread']:
|
||||
continue
|
||||
|
||||
# the seconds are sometimes missing from the timestamps
|
||||
try:
|
||||
dt = arrow.get(row.get('date'), 'YYYY-MM-DDTHH:mmZZ')
|
||||
except:
|
||||
try:
|
||||
dt = arrow.get(row.get('date'), 'YYYY-MM-DDTHH:mm:ssZZ')
|
||||
except:
|
||||
logging.error('failed to parse entry: %s', row)
|
||||
|
||||
dt = dt.to('UTC')
|
||||
contact = lookup.get(row.get('thread'))
|
||||
if not contact:
|
||||
continue
|
||||
msg = row.get('message')
|
||||
sender = row.get('sender')
|
||||
|
||||
fpath = os.path.join(
|
||||
logpathbase,
|
||||
plugin,
|
||||
account,
|
||||
contact,
|
||||
logfilename(dt, nulltime=True)
|
||||
)
|
||||
|
||||
if not os.path.isdir(os.path.dirname(fpath)):
|
||||
os.makedirs(os.path.dirname(fpath))
|
||||
logcreate(fpath, contact, dt, account, plugin)
|
||||
logappend(fpath, dt, sender, msg)
|
||||
|
||||
|
||||
def do_zncfixed(znclogs, logpathbase, znctz):
|
||||
# I manually organised the ZNC logs into pidgin-like
|
||||
# plugin/account/contact/logfiles.log
|
||||
# structure before parsing them
|
||||
LINESPLIT = re.compile(
|
||||
r'^\[(?P<hour>[0-9]+):(?P<minute>[0-9]+):(?P<second>[0-9]+)\]\s+'
|
||||
r'<(?P<sender>.*?)>\s+(?P<msg>.*)$'
|
||||
)
|
||||
searchin = os.path.join(
|
||||
znclogs,
|
||||
'**',
|
||||
'*.log'
|
||||
)
|
||||
logs = glob.glob(searchin, recursive=True)
|
||||
for log in logs:
|
||||
contact = os.path.basename(os.path.dirname(log))
|
||||
account = os.path.basename(os.path.dirname(os.path.dirname(log)))
|
||||
plugin = os.path.basename(os.path.dirname(os.path.dirname(os.path.dirname(log))))
|
||||
logging.info('converting log file: %s' % (log))
|
||||
dt = arrow.get(os.path.basename(log).replace('.log', ''), 'YYYY-MM-DD')
|
||||
dt = dt.replace(tzinfo=znctz)
|
||||
|
||||
|
||||
if contact.startswith("#"):
|
||||
fname = "%s.chat" % (contact)
|
||||
else:
|
||||
fname = contact
|
||||
|
||||
fpath = os.path.join(
|
||||
logpathbase,
|
||||
plugin,
|
||||
account,
|
||||
fname,
|
||||
logfilename(dt)
|
||||
)
|
||||
|
||||
if not os.path.isdir(os.path.dirname(fpath)):
|
||||
os.makedirs(os.path.dirname(fpath))
|
||||
|
||||
with open(log, 'rb') as f:
|
||||
for line in f:
|
||||
line = line.decode('utf8', 'ignore')
|
||||
match = LINESPLIT.match(line)
|
||||
if not match:
|
||||
continue
|
||||
dt = dt.replace(
|
||||
hour=int(match.group('hour')),
|
||||
minute=int(match.group('minute')),
|
||||
second=int(match.group('second'))
|
||||
)
|
||||
logcreate(fpath, contact, dt, account, plugin)
|
||||
logappend(fpath, dt, match.group('sender'), match.group('msg'))
|
||||
|
||||
|
||||
|
||||
def do_msnplus(msgpluslogs, logpathbase, msgplustz):
|
||||
from bs4 import BeautifulSoup
|
||||
# from bs4 import BeautifulSoup
|
||||
NOPAR = re.compile(r'\((.*)\)')
|
||||
NOCOLON = re.compile(r'(.*):?')
|
||||
|
||||
|
@ -253,13 +364,19 @@ if __name__ == '__main__':
|
|||
help='absolute path to Pidgin skype logs'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--facebook_account',
|
||||
default='',
|
||||
help='facebook account name'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--loglevel',
|
||||
default='warning',
|
||||
help='change loglevel'
|
||||
)
|
||||
|
||||
for allowed in ['skype', 'trillian', 'msnplus']:
|
||||
for allowed in ['skype', 'trillian', 'msnplus', 'znc', 'facebook']:
|
||||
parser.add_argument(
|
||||
'--%s' % allowed,
|
||||
action='store_true',
|
||||
|
@ -267,7 +384,7 @@ if __name__ == '__main__':
|
|||
help='convert %s logs' % allowed
|
||||
)
|
||||
|
||||
if allowed != 'skype':
|
||||
if allowed != 'skype' or allowed != 'facebook':
|
||||
parser.add_argument(
|
||||
'--%s_logs' % allowed,
|
||||
default=os.path.expanduser('~/.%s/logs' % allowed),
|
||||
|
@ -299,6 +416,14 @@ if __name__ == '__main__':
|
|||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
|
||||
if params.get('facebook'):
|
||||
logging.info('facebook enabled')
|
||||
do_facebook(
|
||||
params.get('facebook_account'),
|
||||
params.get('pidgin_logs')
|
||||
)
|
||||
|
||||
|
||||
if params.get('skype'):
|
||||
logging.info('Skype enabled; parsing skype logs')
|
||||
do_skype(
|
||||
|
@ -321,3 +446,11 @@ if __name__ == '__main__':
|
|||
params.get('pidgin_logs'),
|
||||
params.get('msnplus_timezone'),
|
||||
)
|
||||
|
||||
if params.get('znc'):
|
||||
logging.info('ZNC enabled; parsing znc logs')
|
||||
do_zncfixed(
|
||||
params.get('znc_logs'),
|
||||
params.get('pidgin_logs'),
|
||||
params.get('znc_timezone'),
|
||||
)
|
||||
|
|
Loading…
Reference in a new issue