0.5
This commit is contained in:
parent
4dcdb6d9be
commit
5eb5f4d29a
2 changed files with 171 additions and 60 deletions
14
readme.txt
14
readme.txt
|
@ -3,7 +3,7 @@ Contributors: cadeyrn
|
||||||
Tags: plain text, export, backup
|
Tags: plain text, export, backup
|
||||||
Requires at least: 3.0
|
Requires at least: 3.0
|
||||||
Tested up to: 4.5.3
|
Tested up to: 4.5.3
|
||||||
Stable tag: 0.4
|
Stable tag: 0.5
|
||||||
License: GPLv3
|
License: GPLv3
|
||||||
License URI: http://www.gnu.org/licenses/gpl-3.0.html
|
License URI: http://www.gnu.org/licenses/gpl-3.0.html
|
||||||
|
|
||||||
|
@ -42,6 +42,18 @@ Version numbering logic:
|
||||||
* every .B version indicates new features.
|
* every .B version indicates new features.
|
||||||
* every ..C indicates bugfixes for A.B version.
|
* every ..C indicates bugfixes for A.B version.
|
||||||
|
|
||||||
|
= 0.5 =
|
||||||
|
*2016-07-22*
|
||||||
|
|
||||||
|
* everything is through filters now
|
||||||
|
* moar magic formatting:
|
||||||
|
** stong/em moved to strict `**`/`_` from `__`/`*`
|
||||||
|
** definition lists moved to strict `: ` from lazy spaces
|
||||||
|
* code is strictly less, than 80 char per line
|
||||||
|
* comments format changed
|
||||||
|
* using index.txt instead of item.md
|
||||||
|
*
|
||||||
|
|
||||||
= 0.3 =
|
= 0.3 =
|
||||||
*2016-07-14*
|
*2016-07-14*
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
Plugin Name: WP Flat Export
|
Plugin Name: WP Flat Export
|
||||||
Plugin URI: https://github.com/petermolnar/wp-flatexport
|
Plugin URI: https://github.com/petermolnar/wp-flatexport
|
||||||
Description: auto-export WordPress flat, structured, readable plain text
|
Description: auto-export WordPress flat, structured, readable plain text
|
||||||
Version: 0.4
|
Version: 0.5
|
||||||
Author: Peter Molnar <hello@petermolnar.net>
|
Author: Peter Molnar <hello@petermolnar.net>
|
||||||
Author URI: http://petermolnar.net/
|
Author URI: http://petermolnar.net/
|
||||||
License: GPLv3
|
License: GPLv3
|
||||||
|
@ -30,6 +30,7 @@ namespace WP_FLATEXPORTS;
|
||||||
define ( 'force', true );
|
define ( 'force', true );
|
||||||
define ( 'basedir', 'flat' );
|
define ( 'basedir', 'flat' );
|
||||||
define ( 'basefile', 'index.txt' );
|
define ( 'basefile', 'index.txt' );
|
||||||
|
define ( 'pandocfile', 'content.asciidoc' );
|
||||||
define ( 'maxattachments', 100 );
|
define ( 'maxattachments', 100 );
|
||||||
define ( 'expire', 10 );
|
define ( 'expire', 10 );
|
||||||
define ( 'wrap', 80 );
|
define ( 'wrap', 80 );
|
||||||
|
@ -71,9 +72,11 @@ function init () {
|
||||||
'post_content_clean_uploaddir',
|
'post_content_clean_uploaddir',
|
||||||
'post_content_insert_featured',
|
'post_content_insert_featured',
|
||||||
'post_content_clear_imgids',
|
'post_content_clear_imgids',
|
||||||
|
//'post_content_pandoc',
|
||||||
|
'post_content_fix_emstrong',
|
||||||
|
'post_content_fix_dl',
|
||||||
'post_content_url2footnote',
|
'post_content_url2footnote',
|
||||||
'post_content_headers',
|
'post_content_headers',
|
||||||
//'post_content_wordwrap',
|
|
||||||
//'post_content_urls',
|
//'post_content_urls',
|
||||||
),
|
),
|
||||||
'wp_flatexport_comment' => array (
|
'wp_flatexport_comment' => array (
|
||||||
|
@ -267,6 +270,10 @@ function insert_urls ( $text, $post ) {
|
||||||
// get rid of trailing slashes; it's either no trailing slash or slash on
|
// get rid of trailing slashes; it's either no trailing slash or slash on
|
||||||
// everything, which breaks .html-like real document path URLs
|
// everything, which breaks .html-like real document path URLs
|
||||||
foreach ( $slugs as $k => $slug ) {
|
foreach ( $slugs as $k => $slug ) {
|
||||||
|
if ( ! strstr( $slug, 'http') ) {
|
||||||
|
unset ( $slugs[ $k ] );
|
||||||
|
continue;
|
||||||
|
}
|
||||||
$slugs[ $k ] = rtrim( $slug, '/' );
|
$slugs[ $k ] = rtrim( $slug, '/' );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -474,9 +481,20 @@ function post_content_resized2orig ( $content, $post ) {
|
||||||
$urlparts = parse_url( \site_url() );
|
$urlparts = parse_url( \site_url() );
|
||||||
$domain = $urlparts ['host'];
|
$domain = $urlparts ['host'];
|
||||||
$wp_upload_dir = \wp_upload_dir();
|
$wp_upload_dir = \wp_upload_dir();
|
||||||
$uploadurl = str_replace( '/', "\\/", trim( str_replace( \site_url(), '', $wp_upload_dir['url']), '/'));
|
$uploadurl = str_replace(
|
||||||
|
'/',
|
||||||
|
"\\/",
|
||||||
|
trim( str_replace(
|
||||||
|
\site_url(),
|
||||||
|
'',
|
||||||
|
$wp_upload_dir['url']
|
||||||
|
), '/')
|
||||||
|
);
|
||||||
|
|
||||||
$pregstr = "/((https?:\/\/". $domain .")?\/". $uploadurl ."\/.*\/[0-9]{4}\/[0-9]{2}\/)(.*)-([0-9]{1,4})×([0-9]{1,4})\.([a-zA-Z]{2,4})/";
|
$pregstr = "/((https?:\/\/". $domain .")?"
|
||||||
|
. "\/". $uploadurl
|
||||||
|
. "\/.*\/[0-9]{4}\/[0-9]{2}\/)(.*)-([0-9]{1,4})×([0-9]{1,4})"
|
||||||
|
. "\.([a-zA-Z]{2,4})/";
|
||||||
|
|
||||||
preg_match_all( $pregstr, $content, $resized_images );
|
preg_match_all( $pregstr, $content, $resized_images );
|
||||||
|
|
||||||
|
@ -491,7 +509,9 @@ function post_content_resized2orig ( $content, $post ) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
$pregstr = "/(https?:\/\/". $domain .")?\/". $uploadurl ."\/.*\/[0-9]{4}\/[0-9]{2}\/(.*?)\.([a-zA-Z]{2,4})/";
|
$pregstr = "/(https?:\/\/". $domain .")?"
|
||||||
|
. "\/".$uploadurl
|
||||||
|
."\/.*\/[0-9]{4}\/[0-9]{2}\/(.*?)\.([a-zA-Z]{2,4})/";
|
||||||
|
|
||||||
preg_match_all( $pregstr, $content, $images );
|
preg_match_all( $pregstr, $content, $images );
|
||||||
if ( !empty ( $images[0] )) {
|
if ( !empty ( $images[0] )) {
|
||||||
|
@ -520,7 +540,11 @@ function post_content_clean_uploaddir ( $content, $post ) {
|
||||||
$urlparts = parse_url( \site_url() );
|
$urlparts = parse_url( \site_url() );
|
||||||
$domain = $urlparts ['host'];
|
$domain = $urlparts ['host'];
|
||||||
$wp_upload_dir = \wp_upload_dir();
|
$wp_upload_dir = \wp_upload_dir();
|
||||||
$uploadurl = str_replace( '/', "\\/", trim( str_replace( \site_url(), '', $wp_upload_dir['url']), '/'));
|
$uploadurl = str_replace(
|
||||||
|
'/',
|
||||||
|
"\\/",
|
||||||
|
trim( str_replace( \site_url(), '', $wp_upload_dir['url'] ), '/' )
|
||||||
|
);
|
||||||
|
|
||||||
$pattern = "/\({$wp_upload_dir['baseurl']}\/(.*?)\)/";
|
$pattern = "/\({$wp_upload_dir['baseurl']}\/(.*?)\)/";
|
||||||
$search = str_replace( '/', '\/', $wp_upload_dir['baseurl'] );
|
$search = str_replace( '/', '\/', $wp_upload_dir['baseurl'] );
|
||||||
|
@ -548,7 +572,11 @@ function post_content_insert_featured ( $content, $post ) {
|
||||||
$title = $meta['image_meta']['title'];
|
$title = $meta['image_meta']['title'];
|
||||||
|
|
||||||
$featured = "\n\n![{$title}]({$src[0]}){#img-{$thid}}";
|
$featured = "\n\n![{$title}]({$src[0]}){#img-{$thid}}";
|
||||||
$content .= apply_filters ( 'wp_flatexport_featured_image', $featured, $post );
|
$content .= apply_filters (
|
||||||
|
'wp_flatexport_featured_image',
|
||||||
|
$featured,
|
||||||
|
$post
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -573,29 +601,32 @@ function post_content_clear_imgids ( $content, $post ) {
|
||||||
function post_content_url2footnote ( $content, $post ) {
|
function post_content_url2footnote ( $content, $post ) {
|
||||||
|
|
||||||
//
|
//
|
||||||
$pattern = "/\s+(\[([^\s].*?)\]\((.*?)(\s?+[\\\"\'].*?[\\\"\'])?\))/";
|
$pattern = "/[\s*_\/]+(\[([^\s].*?)\]\((.*?)(\s?+[\\\"\'].*?[\\\"\'])?\))/";
|
||||||
$matches = array();
|
preg_match_all( $pattern, $content, $m );
|
||||||
preg_match_all( $pattern, $content, $matches );
|
|
||||||
// [1] -> array of []()
|
// [1] -> array of []()
|
||||||
// [2] -> array of []
|
// [2] -> array of []
|
||||||
// [3] -> array of ()
|
// [3] -> array of ()
|
||||||
// [4] -> (maybe) "" titles
|
// [4] -> (maybe) "" titles
|
||||||
if ( ! empty( $matches ) && isset( $matches[0] ) && ! empty( $matches[0] ) ) {
|
if ( ! empty( $m ) && isset( $m[0] ) && ! empty( $m[0] ) ) {
|
||||||
foreach ( $matches[1] as $cntr => $match ) {
|
foreach ( $m[1] as $cntr => $match ) {
|
||||||
$name = trim( $matches[2][$cntr] );
|
$name = trim( $m[2][$cntr] );
|
||||||
$url = trim( $matches[3][$cntr] );
|
$url = trim( $m[3][$cntr] );
|
||||||
if ( ! strstr( $url, 'http') )
|
if ( ! strstr( $url, 'http') )
|
||||||
$url = \site_url( $url );
|
$url = \site_url( $url );
|
||||||
|
|
||||||
$title = "";
|
$title = "";
|
||||||
|
|
||||||
if ( isset( $matches[4][$cntr] ) && !empty( $matches[4][$cntr] ) )
|
if ( isset( $m[4][$cntr] ) && !empty( $m[4][$cntr] ) )
|
||||||
$title = " {$matches[4][$cntr]}";
|
$title = " {$m[4][$cntr]}";
|
||||||
|
|
||||||
$refid = $cntr+1;
|
$refid = $cntr+1;
|
||||||
|
|
||||||
$footnotes[] = "[{$refid}]: {$url}{$title}";
|
$footnotes[] = "[{$refid}]: {$url}{$title}";
|
||||||
$content = str_replace ( $match, "[" . trim( $matches[2][$cntr] ) . "][". $refid ."]" , $content );
|
$content = str_replace (
|
||||||
|
$match,
|
||||||
|
"[" . trim( $m[2][$cntr] ) . "][". $refid ."]" ,
|
||||||
|
$content
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
$content = $content . "\n\n" . join( "\n", $footnotes );
|
$content = $content . "\n\n" . join( "\n", $footnotes );
|
||||||
|
@ -605,57 +636,117 @@ function post_content_url2footnote ( $content, $post ) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* find all second level markdown headers and replace them with underlined version
|
* export with pandoc
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
function post_content_pandoc ( $content, $post ) {
|
||||||
|
$flatroot = \WP_CONTENT_DIR . DIRECTORY_SEPARATOR . basedir;
|
||||||
|
$flatdir = $flatroot . DIRECTORY_SEPARATOR . $post->post_name;
|
||||||
|
$pandoc = $flatdir . DIRECTORY_SEPARATOR . pandocfile;
|
||||||
|
|
||||||
|
$tmp = tempnam ( sys_get_temp_dir() , __NAMESPACE__ );
|
||||||
|
file_put_contents( $tmp, $post->post_content );
|
||||||
|
|
||||||
|
$cmd =
|
||||||
|
"/usr/bin/pandoc -p -f markdown_phpextra -t asciidoc -o {$pandoc} {$tmp}";
|
||||||
|
//exec( $cmd, $exif, $retval);
|
||||||
|
passthru ( $cmd );
|
||||||
|
|
||||||
|
unlink ( $tmp );
|
||||||
|
|
||||||
|
return $content;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* find markdown links and replace them with footnote versions
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
function post_content_fix_emstrong ( $content, $post ) {
|
||||||
|
|
||||||
|
// these regexes are borrowed from https://github.com/erusev/parsedown
|
||||||
|
|
||||||
|
$regexes = array (
|
||||||
|
'strong' => array(
|
||||||
|
'**' => '/[*]{2}((?:\\\\\*|[^*]|[*][^*]*[*])+?)[*]{2}(?![*])/s',
|
||||||
|
'__' => '/__((?:\\\\_|[^_]|_[^_]*_)+?)__(?!_)/us',
|
||||||
|
),
|
||||||
|
'em' => array (
|
||||||
|
'*' => '/[*]((?:\\\\\*|[^*]|[*][*][^*]+?[*][*])+?)[*](?![*])/s',
|
||||||
|
'_' => '/_((?:\\\\_|[^_]|__[^_]*__)+?)_(?!_)\b/us',
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
$replace_map = array (
|
||||||
|
'*' => '/',
|
||||||
|
'_' => '/',
|
||||||
|
'**' => '*',
|
||||||
|
'__' => '*',
|
||||||
|
);
|
||||||
|
|
||||||
|
foreach ( $regexes as $what => $subregexes ) {
|
||||||
|
$m = array();
|
||||||
|
foreach ( $subregexes as $key => $regex ) {
|
||||||
|
preg_match_all( $regex, $content, $m );
|
||||||
|
if ( empty( $m ) || ! isset( $m[0] ) || empty( $m[0] ) )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
foreach ( array_keys ( $m[1] ) as $cntr ) {
|
||||||
|
$content = str_replace (
|
||||||
|
$m[0][$cntr],
|
||||||
|
$replace_map[ $key ] . $m[1][$cntr] . $replace_map[ $key ],
|
||||||
|
$content
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $content;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
function post_content_fix_dl ( $content, $post ) {
|
||||||
|
preg_match_all( '/^.*\n(:\s+).*$/mi', $content, $m );
|
||||||
|
|
||||||
|
if ( empty( $m ) || ! isset( $m[0] ) || empty( $m[0] ) )
|
||||||
|
return $content;
|
||||||
|
|
||||||
|
foreach ( $m[0] as $i => $match ) {
|
||||||
|
$match = str_replace( $m[1][$i], ': ', $match );
|
||||||
|
$content = str_replace( $m[0][$i], $match, $content );
|
||||||
|
}
|
||||||
|
|
||||||
|
return $content;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* find all second level markdown headers and replace them with
|
||||||
|
* underlined version
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
function post_content_headers ( $content, $post ) {
|
function post_content_headers ( $content, $post ) {
|
||||||
|
|
||||||
$map = depthmap();
|
$map = depthmap();
|
||||||
preg_match_all( "/^([#]+)\s?+(.*)$/m", $content, $matches );
|
preg_match_all( "/^([#]+)\s?+(.*)$/m", $content, $m );
|
||||||
|
|
||||||
if ( ! empty( $matches ) && isset( $matches[0] ) && ! empty( $matches[0] ) ) {
|
if ( ! empty( $m ) && isset( $m[0] ) && ! empty( $m[0] ) ) {
|
||||||
foreach ( $matches[0] as $cntr => $match ) {
|
foreach ( $m[0] as $cntr => $match ) {
|
||||||
$depth = strlen( trim( $matches[1][$cntr] ) );
|
$depth = strlen( trim( $m[1][$cntr] ) );
|
||||||
$title = trim( $matches[2][$cntr] );
|
$title = trim( $m[2][$cntr] );
|
||||||
$content = str_replace ( $match, $title ."\n" . str_repeat( $map[ $depth ], mb_strlen( $title, 'UTF-8' ) ), $content );
|
$u = str_repeat( $map[ $depth ], mb_strlen( $title ) );
|
||||||
|
$content = str_replace ( $match, "{$title}\n{$u}", $content );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return $content;
|
return $content;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* word-wrap magic
|
|
||||||
*
|
|
||||||
*
|
|
||||||
function post_content_wordwrap ( $content, $post ) {
|
|
||||||
|
|
||||||
$fenced_o = array();
|
|
||||||
preg_match_all( "/^```(.*?)[\n\r](.*?)```/mis", $content, $fenced_o );
|
|
||||||
|
|
||||||
$content = wordwrap( $content, 72 );
|
|
||||||
|
|
||||||
$fenced_n = array();
|
|
||||||
preg_match_all( "/^```(.*?)[\n\r](.*?)```/mis", $content, $fenced_n );
|
|
||||||
|
|
||||||
foreach ( array_keys( $fenced_o[0] ) as $k ) {
|
|
||||||
if ( $fenced_o[0][$k] != $fenced_n[0][$k] ) {
|
|
||||||
$content = str_replace ( $fenced_n[0][$k], $fenced_o[0][$k], $content );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return $content;
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* convert standalone urls to <url>
|
|
||||||
*
|
|
||||||
function post_content_urls ( $content, $post ) {
|
|
||||||
return $content = preg_replace("/\b((?:http|https)\:\/\/?[a-zA-Z0-9\.\/\?\:@\-_=#]+\.[a-zA-Z0-9\.\/\?\:@\-_=#&,\+%]*)(?:\s|\n|\r|$)/i", '<${1}>' . "\n", $content);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
@ -735,12 +826,14 @@ function export () {
|
||||||
if ( link( $attachment_path, $target_file ) )
|
if ( link( $attachment_path, $target_file ) )
|
||||||
continue;
|
continue;
|
||||||
else
|
else
|
||||||
debug( "could not hardlink '{$attachment_path}' to '{$target_file}'; trying to copy", 5);
|
debug( "could not hardlink '{$attachment_path}'"
|
||||||
|
. " to '{$target_file}'; trying to copy", 5);
|
||||||
|
|
||||||
if ( copy( $attachment_path, $target_file ) )
|
if ( copy( $attachment_path, $target_file ) )
|
||||||
continue;
|
continue;
|
||||||
else
|
else
|
||||||
debug("could not copy '{$attachment_path}' to '{$target_file}'; saving attachment failed!", 4);
|
debug( "could not copy '{$attachment_path}'"
|
||||||
|
. " to '{$target_file}'; saving attachment failed!", 4);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -857,7 +950,10 @@ function fix_post ( &$post = null ) {
|
||||||
* test if an object is actually a post
|
* test if an object is actually a post
|
||||||
*/
|
*/
|
||||||
function is_post ( &$post ) {
|
function is_post ( &$post ) {
|
||||||
if ( !empty($post) && is_object($post) && isset($post->ID) && !empty($post->ID) )
|
if ( ! empty( $post ) &&
|
||||||
|
is_object( $post ) &&
|
||||||
|
isset( $post->ID ) &&
|
||||||
|
! empty( $post->ID ) )
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
@ -917,6 +1013,9 @@ function debug( $message, $level = LOG_NOTICE ) {
|
||||||
if (isset($caller['class']))
|
if (isset($caller['class']))
|
||||||
$parent = $caller['class'] . '::' . $parent;
|
$parent = $caller['class'] . '::' . $parent;
|
||||||
|
|
||||||
|
if (isset($caller['namespace']))
|
||||||
|
$parent = $caller['namespace'] . '::' . $parent;
|
||||||
|
|
||||||
return error_log( "{$parent}: {$message}" );
|
return error_log( "{$parent}: {$message}" );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue