2018-12-03 10:36:10 +00:00
|
|
|
__author__ = "Peter Molnar"
|
2019-01-05 11:55:40 +00:00
|
|
|
__copyright__ = "Copyright 2017-2019, Peter Molnar"
|
2018-12-03 10:36:10 +00:00
|
|
|
__license__ = "apache-2.0"
|
|
|
|
__maintainer__ = "Peter Molnar"
|
|
|
|
__email__ = "mail@petermolnar.net"
|
|
|
|
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
import subprocess
|
|
|
|
import logging
|
2019-03-22 15:49:24 +00:00
|
|
|
import hashlib
|
|
|
|
import os
|
|
|
|
import settings
|
Back To Pandoc
So, Python Markdown is a bottomless pit of horrors, including crippling parsing bugs,
random out of nowhere, lack of features. It's definitely much faster, than
Pandoc, but Pandoc doesn't go full retard where there's a regex in a fenced code block,
that happens to be a regex for markdown elements.
Also added some ugly post string replacements to make Pandoc fenced code output work
with Prism:
instead of the Pandoc <pre class="codelang"><code>, Prism wants
<pre><code class="language-codelang>, so I added a regex sub, because it's 00:32.
2018-08-04 00:28:55 +01:00
|
|
|
|
2019-06-25 22:48:04 +01:00
|
|
|
|
2019-03-22 15:49:24 +00:00
|
|
|
class Pandoc(str):
|
2019-06-25 22:48:04 +01:00
|
|
|
in_format = "html"
|
2019-02-25 22:40:01 +00:00
|
|
|
in_options = []
|
2019-06-25 22:48:04 +01:00
|
|
|
out_format = "plain"
|
2019-02-25 22:40:01 +00:00
|
|
|
out_options = []
|
|
|
|
columns = None
|
|
|
|
|
2019-03-22 15:49:24 +00:00
|
|
|
@property
|
|
|
|
def hash(self):
|
|
|
|
return str(hashlib.sha1(self.source.encode()).hexdigest())
|
|
|
|
|
|
|
|
@property
|
|
|
|
def cachefile(self):
|
|
|
|
return os.path.join(
|
2019-06-25 22:48:04 +01:00
|
|
|
settings.tmpdir, "%s_%s.pandoc" % (self.__class__.__name__, self.hash)
|
2019-03-22 15:49:24 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
@property
|
|
|
|
def cache(self):
|
|
|
|
if not os.path.exists(self.cachefile):
|
|
|
|
return False
|
2019-06-25 22:48:04 +01:00
|
|
|
with open(self.cachefile, "rt") as f:
|
2019-03-22 15:49:24 +00:00
|
|
|
self.result = f.read()
|
|
|
|
return True
|
|
|
|
|
2019-02-25 22:40:01 +00:00
|
|
|
def __init__(self, text):
|
|
|
|
self.source = text
|
2019-03-22 15:49:24 +00:00
|
|
|
if self.cache:
|
|
|
|
return
|
2019-06-25 22:48:04 +01:00
|
|
|
conv_to = "--to=%s" % (self.out_format)
|
|
|
|
if len(self.out_options):
|
2019-08-12 10:46:04 +01:00
|
|
|
conv_to = "%s%s" % (conv_to, "".join(self.out_options))
|
2019-06-25 22:48:04 +01:00
|
|
|
|
|
|
|
conv_from = "--from=%s" % (self.in_format)
|
|
|
|
if len(self.in_options):
|
2019-08-12 10:46:04 +01:00
|
|
|
conv_from = "%s%s" % (conv_from, "".join(self.in_options))
|
2019-06-25 22:48:04 +01:00
|
|
|
|
2019-04-09 21:34:03 +01:00
|
|
|
is_pandoc_version2 = False
|
|
|
|
try:
|
2019-06-25 22:48:04 +01:00
|
|
|
version = subprocess.check_output(["pandoc", "-v"])
|
|
|
|
if version.startswith(b"pandoc 2"):
|
2019-04-09 21:34:03 +01:00
|
|
|
is_pandoc_version2 = True
|
|
|
|
except OSError:
|
|
|
|
print("Error: pandoc is not installed!")
|
2019-06-25 22:48:04 +01:00
|
|
|
|
|
|
|
cmd = ["pandoc", "-o-", conv_to, conv_from, "--no-highlight"]
|
2019-04-09 21:34:03 +01:00
|
|
|
if is_pandoc_version2:
|
|
|
|
# Only pandoc v2 and higher support quiet param
|
2019-06-25 22:48:04 +01:00
|
|
|
cmd.append("--quiet")
|
2019-04-09 21:34:03 +01:00
|
|
|
|
2019-02-25 22:40:01 +00:00
|
|
|
if self.columns:
|
|
|
|
cmd.append(self.columns)
|
|
|
|
|
2019-01-15 21:28:58 +00:00
|
|
|
p = subprocess.Popen(
|
2019-02-25 22:40:01 +00:00
|
|
|
tuple(cmd),
|
2019-01-15 21:28:58 +00:00
|
|
|
stdin=subprocess.PIPE,
|
|
|
|
stdout=subprocess.PIPE,
|
|
|
|
stderr=subprocess.PIPE,
|
|
|
|
)
|
|
|
|
|
|
|
|
stdout, stderr = p.communicate(input=text.encode())
|
|
|
|
if stderr:
|
2019-06-25 22:48:04 +01:00
|
|
|
logging.warning("Error during pandoc covert:\n\t%s\n\t%s", cmd, stderr)
|
|
|
|
r = stdout.decode("utf-8").strip()
|
2019-02-25 22:40:01 +00:00
|
|
|
self.result = r
|
2019-06-25 22:48:04 +01:00
|
|
|
with open(self.cachefile, "wt") as f:
|
2019-03-22 15:49:24 +00:00
|
|
|
f.write(self.result)
|
2019-02-25 22:40:01 +00:00
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
return str(self.result)
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
return str(self.result)
|
|
|
|
|
|
|
|
|
2019-03-22 15:49:24 +00:00
|
|
|
class PandocMD2HTML(Pandoc):
|
2019-06-25 22:48:04 +01:00
|
|
|
in_format = "markdown"
|
2019-02-25 22:40:01 +00:00
|
|
|
in_options = [
|
2019-08-12 10:46:04 +01:00
|
|
|
"+footnotes",
|
|
|
|
"+pipe_tables",
|
|
|
|
"+strikeout",
|
2019-02-25 22:40:01 +00:00
|
|
|
# 'superscript',
|
|
|
|
# 'subscript',
|
2019-08-12 10:46:04 +01:00
|
|
|
"+raw_html",
|
|
|
|
"+definition_lists",
|
|
|
|
"+backtick_code_blocks",
|
|
|
|
"+fenced_code_attributes",
|
|
|
|
"+shortcut_reference_links",
|
|
|
|
"+lists_without_preceding_blankline",
|
|
|
|
"+autolink_bare_uris",
|
|
|
|
"-smart",
|
2019-02-25 22:40:01 +00:00
|
|
|
]
|
2019-06-25 22:48:04 +01:00
|
|
|
out_format = "html5"
|
2019-02-25 22:40:01 +00:00
|
|
|
out_options = []
|
|
|
|
|
|
|
|
|
2019-03-22 15:49:24 +00:00
|
|
|
class PandocHTML2MD(Pandoc):
|
2019-06-25 22:48:04 +01:00
|
|
|
in_format = "html"
|
2019-02-25 22:40:01 +00:00
|
|
|
in_options = []
|
2019-06-25 22:48:04 +01:00
|
|
|
out_format = "markdown"
|
2019-02-25 22:40:01 +00:00
|
|
|
out_options = [
|
2019-08-12 10:46:04 +01:00
|
|
|
"+footnotes",
|
|
|
|
"+pipe_tables",
|
|
|
|
"+strikeout",
|
|
|
|
"+raw_html",
|
|
|
|
"+definition_lists",
|
|
|
|
"+backtick_code_blocks",
|
|
|
|
"+fenced_code_attributes",
|
|
|
|
"+shortcut_reference_links",
|
|
|
|
"+lists_without_preceding_blankline",
|
|
|
|
"+autolink_bare_uris",
|
|
|
|
"-smart",
|
2019-02-25 22:40:01 +00:00
|
|
|
]
|
|
|
|
|
|
|
|
|
2019-03-22 15:49:24 +00:00
|
|
|
class PandocMD2TXT(Pandoc):
|
2019-06-25 22:48:04 +01:00
|
|
|
in_format = "markdown"
|
2019-02-25 22:40:01 +00:00
|
|
|
in_options = [
|
2019-08-12 10:46:04 +01:00
|
|
|
"+footnotes",
|
|
|
|
"+pipe_tables",
|
|
|
|
"+strikeout",
|
|
|
|
"+raw_html",
|
|
|
|
"+definition_lists",
|
|
|
|
"+backtick_code_blocks",
|
|
|
|
"+fenced_code_attributes",
|
|
|
|
"+shortcut_reference_links",
|
|
|
|
"+lists_without_preceding_blankline",
|
|
|
|
"+autolink_bare_uris",
|
|
|
|
"-smart",
|
2019-02-25 22:40:01 +00:00
|
|
|
]
|
2020-02-21 12:02:26 +00:00
|
|
|
out_format = "markdown"
|
|
|
|
out_options = [
|
|
|
|
"+footnotes",
|
|
|
|
"+pipe_tables",
|
|
|
|
"+strikeout",
|
|
|
|
"+raw_html",
|
|
|
|
"+definition_lists",
|
|
|
|
"+backtick_code_blocks",
|
|
|
|
"+fenced_code_attributes",
|
|
|
|
"+shortcut_reference_links",
|
|
|
|
"+lists_without_preceding_blankline",
|
|
|
|
"+autolink_bare_uris",
|
|
|
|
"-smart",
|
|
|
|
]
|
2019-06-25 22:48:04 +01:00
|
|
|
columns = "--columns=80"
|
2019-03-22 15:49:24 +00:00
|
|
|
|
|
|
|
|
|
|
|
class PandocHTML2TXT(Pandoc):
|
2019-06-25 22:48:04 +01:00
|
|
|
in_format = "html"
|
2019-03-22 15:49:24 +00:00
|
|
|
in_options = []
|
2019-06-25 22:48:04 +01:00
|
|
|
out_format = "plain"
|
2019-03-22 15:49:24 +00:00
|
|
|
out_options = []
|
2019-06-25 22:48:04 +01:00
|
|
|
columns = "--columns=80"
|