nasg/pandoc.py

161 lines
3.7 KiB
Python
Raw Permalink Normal View History

__author__ = "Peter Molnar"
__copyright__ = "Copyright 2017-2019, Peter Molnar"
__license__ = "apache-2.0"
__maintainer__ = "Peter Molnar"
__email__ = "mail@petermolnar.net"
import subprocess
import logging
2019-03-22 15:49:24 +00:00
from tempfile import gettempdir
import hashlib
import os
import settings
2019-03-22 15:49:24 +00:00
class Pandoc(str):
in_format = 'html'
in_options = []
out_format = 'plain'
out_options = []
columns = None
2019-03-22 15:49:24 +00:00
@property
def hash(self):
return str(hashlib.sha1(self.source.encode()).hexdigest())
@property
def cachefile(self):
return os.path.join(
settings.tmpdir,
"%s_%s.pandoc" % (
self.__class__.__name__,
self.hash
)
)
@property
def cache(self):
if not os.path.exists(self.cachefile):
return False
with open(self.cachefile, 'rt') as f:
self.result = f.read()
return True
def __init__(self, text):
self.source = text
2019-03-22 15:49:24 +00:00
if self.cache:
return
conv_to = '--to=%s' % (self.out_format)
if (len(self.out_options)):
conv_to = '%s+%s' % (
conv_to,
'+'.join(self.out_options)
)
2019-01-15 21:28:58 +00:00
conv_from = '--from=%s' % (self.in_format)
if (len(self.in_options)):
conv_from = '%s+%s' % (
conv_from,
'+'.join(self.in_options)
)
cmd = [
2019-01-15 21:28:58 +00:00
'pandoc',
'-o-',
conv_to,
conv_from,
2019-01-15 21:28:58 +00:00
'--quiet',
'--no-highlight'
]
if self.columns:
cmd.append(self.columns)
2019-01-15 21:28:58 +00:00
p = subprocess.Popen(
tuple(cmd),
2019-01-15 21:28:58 +00:00
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
stdout, stderr = p.communicate(input=text.encode())
if stderr:
logging.warning(
"Error during pandoc covert:\n\t%s\n\t%s",
cmd,
stderr
)
r = stdout.decode('utf-8').strip()
self.result = r
2019-03-22 15:49:24 +00:00
with open(self.cachefile, 'wt') as f:
f.write(self.result)
def __str__(self):
return str(self.result)
def __repr__(self):
return str(self.result)
2019-03-22 15:49:24 +00:00
class PandocMD2HTML(Pandoc):
in_format = 'markdown'
in_options = [
'footnotes',
'pipe_tables',
'strikeout',
# 'superscript',
# 'subscript',
'raw_html',
'definition_lists',
'backtick_code_blocks',
'fenced_code_attributes',
'shortcut_reference_links',
'lists_without_preceding_blankline',
'autolink_bare_uris',
]
out_format = 'html5'
out_options = []
2019-03-22 15:49:24 +00:00
class PandocHTML2MD(Pandoc):
in_format = 'html'
in_options = []
out_format = 'markdown'
out_options = [
'footnotes',
'pipe_tables',
'strikeout',
'raw_html',
'definition_lists',
'backtick_code_blocks',
'fenced_code_attributes',
'shortcut_reference_links',
'lists_without_preceding_blankline',
'autolink_bare_uris',
]
2019-03-22 15:49:24 +00:00
class PandocMD2TXT(Pandoc):
in_format = 'markdown'
in_options = [
'footnotes',
'pipe_tables',
'strikeout',
'raw_html',
'definition_lists',
'backtick_code_blocks',
'fenced_code_attributes',
'shortcut_reference_links',
'lists_without_preceding_blankline',
'autolink_bare_uris',
]
out_format = 'plain'
out_options = []
2019-03-22 15:49:24 +00:00
columns = '--columns=80'
class PandocHTML2TXT(Pandoc):
in_format = 'html'
in_options = []
out_format = 'plain'
out_options = []
columns = '--columns=80'