nasg/pandoc.py

167 lines
4.2 KiB
Python
Raw Normal View History

__author__ = "Peter Molnar"
__copyright__ = "Copyright 2017-2019, Peter Molnar"
__license__ = "apache-2.0"
__maintainer__ = "Peter Molnar"
__email__ = "mail@petermolnar.net"
import subprocess
import logging
2019-03-22 15:49:24 +00:00
import hashlib
import os
import settings
2019-06-25 22:48:04 +01:00
2019-03-22 15:49:24 +00:00
class Pandoc(str):
2019-06-25 22:48:04 +01:00
in_format = "html"
in_options = []
2019-06-25 22:48:04 +01:00
out_format = "plain"
out_options = []
columns = None
2019-03-22 15:49:24 +00:00
@property
def hash(self):
return str(hashlib.sha1(self.source.encode()).hexdigest())
@property
def cachefile(self):
return os.path.join(
2019-06-25 22:48:04 +01:00
settings.tmpdir, "%s_%s.pandoc" % (self.__class__.__name__, self.hash)
2019-03-22 15:49:24 +00:00
)
@property
def cache(self):
if not os.path.exists(self.cachefile):
return False
2019-06-25 22:48:04 +01:00
with open(self.cachefile, "rt") as f:
2019-03-22 15:49:24 +00:00
self.result = f.read()
return True
def __init__(self, text):
self.source = text
2019-03-22 15:49:24 +00:00
if self.cache:
return
2019-06-25 22:48:04 +01:00
conv_to = "--to=%s" % (self.out_format)
if len(self.out_options):
conv_to = "%s%s" % (conv_to, "".join(self.out_options))
2019-06-25 22:48:04 +01:00
conv_from = "--from=%s" % (self.in_format)
if len(self.in_options):
conv_from = "%s%s" % (conv_from, "".join(self.in_options))
2019-06-25 22:48:04 +01:00
2019-04-09 21:34:03 +01:00
is_pandoc_version2 = False
try:
2019-06-25 22:48:04 +01:00
version = subprocess.check_output(["pandoc", "-v"])
if version.startswith(b"pandoc 2"):
2019-04-09 21:34:03 +01:00
is_pandoc_version2 = True
except OSError:
print("Error: pandoc is not installed!")
2019-06-25 22:48:04 +01:00
cmd = ["pandoc", "-o-", conv_to, conv_from, "--no-highlight"]
2019-04-09 21:34:03 +01:00
if is_pandoc_version2:
# Only pandoc v2 and higher support quiet param
2019-06-25 22:48:04 +01:00
cmd.append("--quiet")
2019-04-09 21:34:03 +01:00
if self.columns:
cmd.append(self.columns)
2019-01-15 21:28:58 +00:00
p = subprocess.Popen(
tuple(cmd),
2019-01-15 21:28:58 +00:00
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
stdout, stderr = p.communicate(input=text.encode())
if stderr:
2019-06-25 22:48:04 +01:00
logging.warning("Error during pandoc covert:\n\t%s\n\t%s", cmd, stderr)
r = stdout.decode("utf-8").strip()
self.result = r
2019-06-25 22:48:04 +01:00
with open(self.cachefile, "wt") as f:
2019-03-22 15:49:24 +00:00
f.write(self.result)
def __str__(self):
return str(self.result)
def __repr__(self):
return str(self.result)
2019-03-22 15:49:24 +00:00
class PandocMD2HTML(Pandoc):
2019-06-25 22:48:04 +01:00
in_format = "markdown"
in_options = [
"+footnotes",
"+pipe_tables",
"+strikeout",
# 'superscript',
# 'subscript',
"+raw_html",
"+definition_lists",
"+backtick_code_blocks",
"+fenced_code_attributes",
"+shortcut_reference_links",
"+lists_without_preceding_blankline",
"+autolink_bare_uris",
"-smart",
]
2019-06-25 22:48:04 +01:00
out_format = "html5"
out_options = []
2019-03-22 15:49:24 +00:00
class PandocHTML2MD(Pandoc):
2019-06-25 22:48:04 +01:00
in_format = "html"
in_options = []
2019-06-25 22:48:04 +01:00
out_format = "markdown"
out_options = [
"+footnotes",
"+pipe_tables",
"+strikeout",
"+raw_html",
"+definition_lists",
"+backtick_code_blocks",
"+fenced_code_attributes",
"+shortcut_reference_links",
"+lists_without_preceding_blankline",
"+autolink_bare_uris",
"-smart",
]
2019-03-22 15:49:24 +00:00
class PandocMD2TXT(Pandoc):
2019-06-25 22:48:04 +01:00
in_format = "markdown"
in_options = [
"+footnotes",
"+pipe_tables",
"+strikeout",
"+raw_html",
"+definition_lists",
"+backtick_code_blocks",
"+fenced_code_attributes",
"+shortcut_reference_links",
"+lists_without_preceding_blankline",
"+autolink_bare_uris",
"-smart",
]
out_format = "markdown"
out_options = [
"+footnotes",
"+pipe_tables",
"+strikeout",
"+raw_html",
"+definition_lists",
"+backtick_code_blocks",
"+fenced_code_attributes",
"+shortcut_reference_links",
"+lists_without_preceding_blankline",
"+autolink_bare_uris",
"-smart",
]
2019-06-25 22:48:04 +01:00
columns = "--columns=80"
2019-03-22 15:49:24 +00:00
class PandocHTML2TXT(Pandoc):
2019-06-25 22:48:04 +01:00
in_format = "html"
2019-03-22 15:49:24 +00:00
in_options = []
2019-06-25 22:48:04 +01:00
out_format = "plain"
2019-03-22 15:49:24 +00:00
out_options = []
2019-06-25 22:48:04 +01:00
columns = "--columns=80"