This commit is contained in:
Peter Molnar 2016-07-22 10:12:51 +00:00
parent 4dcdb6d9be
commit 5eb5f4d29a
2 changed files with 171 additions and 60 deletions

View file

@ -3,7 +3,7 @@ Contributors: cadeyrn
Tags: plain text, export, backup Tags: plain text, export, backup
Requires at least: 3.0 Requires at least: 3.0
Tested up to: 4.5.3 Tested up to: 4.5.3
Stable tag: 0.4 Stable tag: 0.5
License: GPLv3 License: GPLv3
License URI: http://www.gnu.org/licenses/gpl-3.0.html License URI: http://www.gnu.org/licenses/gpl-3.0.html
@ -42,6 +42,18 @@ Version numbering logic:
* every .B version indicates new features. * every .B version indicates new features.
* every ..C indicates bugfixes for A.B version. * every ..C indicates bugfixes for A.B version.
= 0.5 =
*2016-07-22*
* everything is through filters now
* moar magic formatting:
** stong/em moved to strict `**`/`_` from `__`/`*`
** definition lists moved to strict `: ` from lazy spaces
* code is strictly less, than 80 char per line
* comments format changed
* using index.txt instead of item.md
*
= 0.3 = = 0.3 =
*2016-07-14* *2016-07-14*

View file

@ -3,7 +3,7 @@
Plugin Name: WP Flat Export Plugin Name: WP Flat Export
Plugin URI: https://github.com/petermolnar/wp-flatexport Plugin URI: https://github.com/petermolnar/wp-flatexport
Description: auto-export WordPress flat, structured, readable plain text Description: auto-export WordPress flat, structured, readable plain text
Version: 0.4 Version: 0.5
Author: Peter Molnar <hello@petermolnar.net> Author: Peter Molnar <hello@petermolnar.net>
Author URI: http://petermolnar.net/ Author URI: http://petermolnar.net/
License: GPLv3 License: GPLv3
@ -30,6 +30,7 @@ namespace WP_FLATEXPORTS;
define ( 'force', true ); define ( 'force', true );
define ( 'basedir', 'flat' ); define ( 'basedir', 'flat' );
define ( 'basefile', 'index.txt' ); define ( 'basefile', 'index.txt' );
define ( 'pandocfile', 'content.asciidoc' );
define ( 'maxattachments', 100 ); define ( 'maxattachments', 100 );
define ( 'expire', 10 ); define ( 'expire', 10 );
define ( 'wrap', 80 ); define ( 'wrap', 80 );
@ -71,9 +72,11 @@ function init () {
'post_content_clean_uploaddir', 'post_content_clean_uploaddir',
'post_content_insert_featured', 'post_content_insert_featured',
'post_content_clear_imgids', 'post_content_clear_imgids',
//'post_content_pandoc',
'post_content_fix_emstrong',
'post_content_fix_dl',
'post_content_url2footnote', 'post_content_url2footnote',
'post_content_headers', 'post_content_headers',
//'post_content_wordwrap',
//'post_content_urls', //'post_content_urls',
), ),
'wp_flatexport_comment' => array ( 'wp_flatexport_comment' => array (
@ -267,6 +270,10 @@ function insert_urls ( $text, $post ) {
// get rid of trailing slashes; it's either no trailing slash or slash on // get rid of trailing slashes; it's either no trailing slash or slash on
// everything, which breaks .html-like real document path URLs // everything, which breaks .html-like real document path URLs
foreach ( $slugs as $k => $slug ) { foreach ( $slugs as $k => $slug ) {
if ( ! strstr( $slug, 'http') ) {
unset ( $slugs[ $k ] );
continue;
}
$slugs[ $k ] = rtrim( $slug, '/' ); $slugs[ $k ] = rtrim( $slug, '/' );
} }
@ -474,9 +481,20 @@ function post_content_resized2orig ( $content, $post ) {
$urlparts = parse_url( \site_url() ); $urlparts = parse_url( \site_url() );
$domain = $urlparts ['host']; $domain = $urlparts ['host'];
$wp_upload_dir = \wp_upload_dir(); $wp_upload_dir = \wp_upload_dir();
$uploadurl = str_replace( '/', "\\/", trim( str_replace( \site_url(), '', $wp_upload_dir['url']), '/')); $uploadurl = str_replace(
'/',
"\\/",
trim( str_replace(
\site_url(),
'',
$wp_upload_dir['url']
), '/')
);
$pregstr = "/((https?:\/\/". $domain .")?\/". $uploadurl ."\/.*\/[0-9]{4}\/[0-9]{2}\/)(.*)-([0-9]{1,4})×([0-9]{1,4})\.([a-zA-Z]{2,4})/"; $pregstr = "/((https?:\/\/". $domain .")?"
. "\/". $uploadurl
. "\/.*\/[0-9]{4}\/[0-9]{2}\/)(.*)-([0-9]{1,4})×([0-9]{1,4})"
. "\.([a-zA-Z]{2,4})/";
preg_match_all( $pregstr, $content, $resized_images ); preg_match_all( $pregstr, $content, $resized_images );
@ -491,7 +509,9 @@ function post_content_resized2orig ( $content, $post ) {
} }
} }
$pregstr = "/(https?:\/\/". $domain .")?\/". $uploadurl ."\/.*\/[0-9]{4}\/[0-9]{2}\/(.*?)\.([a-zA-Z]{2,4})/"; $pregstr = "/(https?:\/\/". $domain .")?"
. "\/".$uploadurl
."\/.*\/[0-9]{4}\/[0-9]{2}\/(.*?)\.([a-zA-Z]{2,4})/";
preg_match_all( $pregstr, $content, $images ); preg_match_all( $pregstr, $content, $images );
if ( !empty ( $images[0] )) { if ( !empty ( $images[0] )) {
@ -520,7 +540,11 @@ function post_content_clean_uploaddir ( $content, $post ) {
$urlparts = parse_url( \site_url() ); $urlparts = parse_url( \site_url() );
$domain = $urlparts ['host']; $domain = $urlparts ['host'];
$wp_upload_dir = \wp_upload_dir(); $wp_upload_dir = \wp_upload_dir();
$uploadurl = str_replace( '/', "\\/", trim( str_replace( \site_url(), '', $wp_upload_dir['url']), '/')); $uploadurl = str_replace(
'/',
"\\/",
trim( str_replace( \site_url(), '', $wp_upload_dir['url'] ), '/' )
);
$pattern = "/\({$wp_upload_dir['baseurl']}\/(.*?)\)/"; $pattern = "/\({$wp_upload_dir['baseurl']}\/(.*?)\)/";
$search = str_replace( '/', '\/', $wp_upload_dir['baseurl'] ); $search = str_replace( '/', '\/', $wp_upload_dir['baseurl'] );
@ -548,7 +572,11 @@ function post_content_insert_featured ( $content, $post ) {
$title = $meta['image_meta']['title']; $title = $meta['image_meta']['title'];
$featured = "\n\n![{$title}]({$src[0]}){#img-{$thid}}"; $featured = "\n\n![{$title}]({$src[0]}){#img-{$thid}}";
$content .= apply_filters ( 'wp_flatexport_featured_image', $featured, $post ); $content .= apply_filters (
'wp_flatexport_featured_image',
$featured,
$post
);
} }
} }
@ -573,29 +601,32 @@ function post_content_clear_imgids ( $content, $post ) {
function post_content_url2footnote ( $content, $post ) { function post_content_url2footnote ( $content, $post ) {
// //
$pattern = "/\s+(\[([^\s].*?)\]\((.*?)(\s?+[\\\"\'].*?[\\\"\'])?\))/"; $pattern = "/[\s*_\/]+(\[([^\s].*?)\]\((.*?)(\s?+[\\\"\'].*?[\\\"\'])?\))/";
$matches = array(); preg_match_all( $pattern, $content, $m );
preg_match_all( $pattern, $content, $matches );
// [1] -> array of []() // [1] -> array of []()
// [2] -> array of [] // [2] -> array of []
// [3] -> array of () // [3] -> array of ()
// [4] -> (maybe) "" titles // [4] -> (maybe) "" titles
if ( ! empty( $matches ) && isset( $matches[0] ) && ! empty( $matches[0] ) ) { if ( ! empty( $m ) && isset( $m[0] ) && ! empty( $m[0] ) ) {
foreach ( $matches[1] as $cntr => $match ) { foreach ( $m[1] as $cntr => $match ) {
$name = trim( $matches[2][$cntr] ); $name = trim( $m[2][$cntr] );
$url = trim( $matches[3][$cntr] ); $url = trim( $m[3][$cntr] );
if ( ! strstr( $url, 'http') ) if ( ! strstr( $url, 'http') )
$url = \site_url( $url ); $url = \site_url( $url );
$title = ""; $title = "";
if ( isset( $matches[4][$cntr] ) && !empty( $matches[4][$cntr] ) ) if ( isset( $m[4][$cntr] ) && !empty( $m[4][$cntr] ) )
$title = " {$matches[4][$cntr]}"; $title = " {$m[4][$cntr]}";
$refid = $cntr+1; $refid = $cntr+1;
$footnotes[] = "[{$refid}]: {$url}{$title}"; $footnotes[] = "[{$refid}]: {$url}{$title}";
$content = str_replace ( $match, "[" . trim( $matches[2][$cntr] ) . "][". $refid ."]" , $content ); $content = str_replace (
$match,
"[" . trim( $m[2][$cntr] ) . "][". $refid ."]" ,
$content
);
} }
$content = $content . "\n\n" . join( "\n", $footnotes ); $content = $content . "\n\n" . join( "\n", $footnotes );
@ -605,57 +636,117 @@ function post_content_url2footnote ( $content, $post ) {
} }
/** /**
* find all second level markdown headers and replace them with underlined version * export with pandoc
*
*/
function post_content_pandoc ( $content, $post ) {
$flatroot = \WP_CONTENT_DIR . DIRECTORY_SEPARATOR . basedir;
$flatdir = $flatroot . DIRECTORY_SEPARATOR . $post->post_name;
$pandoc = $flatdir . DIRECTORY_SEPARATOR . pandocfile;
$tmp = tempnam ( sys_get_temp_dir() , __NAMESPACE__ );
file_put_contents( $tmp, $post->post_content );
$cmd =
"/usr/bin/pandoc -p -f markdown_phpextra -t asciidoc -o {$pandoc} {$tmp}";
//exec( $cmd, $exif, $retval);
passthru ( $cmd );
unlink ( $tmp );
return $content;
}
/**
* find markdown links and replace them with footnote versions
*
*/
function post_content_fix_emstrong ( $content, $post ) {
// these regexes are borrowed from https://github.com/erusev/parsedown
$regexes = array (
'strong' => array(
'**' => '/[*]{2}((?:\\\\\*|[^*]|[*][^*]*[*])+?)[*]{2}(?![*])/s',
'__' => '/__((?:\\\\_|[^_]|_[^_]*_)+?)__(?!_)/us',
),
'em' => array (
'*' => '/[*]((?:\\\\\*|[^*]|[*][*][^*]+?[*][*])+?)[*](?![*])/s',
'_' => '/_((?:\\\\_|[^_]|__[^_]*__)+?)_(?!_)\b/us',
)
);
$replace_map = array (
'*' => '/',
'_' => '/',
'**' => '*',
'__' => '*',
);
foreach ( $regexes as $what => $subregexes ) {
$m = array();
foreach ( $subregexes as $key => $regex ) {
preg_match_all( $regex, $content, $m );
if ( empty( $m ) || ! isset( $m[0] ) || empty( $m[0] ) )
continue;
foreach ( array_keys ( $m[1] ) as $cntr ) {
$content = str_replace (
$m[0][$cntr],
$replace_map[ $key ] . $m[1][$cntr] . $replace_map[ $key ],
$content
);
}
}
}
return $content;
}
/**
*
*
*/
function post_content_fix_dl ( $content, $post ) {
preg_match_all( '/^.*\n(:\s+).*$/mi', $content, $m );
if ( empty( $m ) || ! isset( $m[0] ) || empty( $m[0] ) )
return $content;
foreach ( $m[0] as $i => $match ) {
$match = str_replace( $m[1][$i], ': ', $match );
$content = str_replace( $m[0][$i], $match, $content );
}
return $content;
}
/**
* find all second level markdown headers and replace them with
* underlined version
* *
*/ */
function post_content_headers ( $content, $post ) { function post_content_headers ( $content, $post ) {
$map = depthmap(); $map = depthmap();
preg_match_all( "/^([#]+)\s?+(.*)$/m", $content, $matches ); preg_match_all( "/^([#]+)\s?+(.*)$/m", $content, $m );
if ( ! empty( $matches ) && isset( $matches[0] ) && ! empty( $matches[0] ) ) { if ( ! empty( $m ) && isset( $m[0] ) && ! empty( $m[0] ) ) {
foreach ( $matches[0] as $cntr => $match ) { foreach ( $m[0] as $cntr => $match ) {
$depth = strlen( trim( $matches[1][$cntr] ) ); $depth = strlen( trim( $m[1][$cntr] ) );
$title = trim( $matches[2][$cntr] ); $title = trim( $m[2][$cntr] );
$content = str_replace ( $match, $title ."\n" . str_repeat( $map[ $depth ], mb_strlen( $title, 'UTF-8' ) ), $content ); $u = str_repeat( $map[ $depth ], mb_strlen( $title ) );
$content = str_replace ( $match, "{$title}\n{$u}", $content );
} }
} }
return $content; return $content;
} }
/**
* word-wrap magic
*
*
function post_content_wordwrap ( $content, $post ) {
$fenced_o = array();
preg_match_all( "/^```(.*?)[\n\r](.*?)```/mis", $content, $fenced_o );
$content = wordwrap( $content, 72 );
$fenced_n = array();
preg_match_all( "/^```(.*?)[\n\r](.*?)```/mis", $content, $fenced_n );
foreach ( array_keys( $fenced_o[0] ) as $k ) {
if ( $fenced_o[0][$k] != $fenced_n[0][$k] ) {
$content = str_replace ( $fenced_n[0][$k], $fenced_o[0][$k], $content );
}
}
return $content;
}
*/
/**
* convert standalone urls to <url>
*
function post_content_urls ( $content, $post ) {
return $content = preg_replace("/\b((?:http|https)\:\/\/?[a-zA-Z0-9\.\/\?\:@\-_=#]+\.[a-zA-Z0-9\.\/\?\:@\-_=#&,\+%]*)(?:\s|\n|\r|$)/i", '<${1}>' . "\n", $content);
}
*/
/** /**
* *
*/ */
@ -735,12 +826,14 @@ function export () {
if ( link( $attachment_path, $target_file ) ) if ( link( $attachment_path, $target_file ) )
continue; continue;
else else
debug( "could not hardlink '{$attachment_path}' to '{$target_file}'; trying to copy", 5); debug( "could not hardlink '{$attachment_path}'"
. " to '{$target_file}'; trying to copy", 5);
if ( copy( $attachment_path, $target_file ) ) if ( copy( $attachment_path, $target_file ) )
continue; continue;
else else
debug("could not copy '{$attachment_path}' to '{$target_file}'; saving attachment failed!", 4); debug( "could not copy '{$attachment_path}'"
. " to '{$target_file}'; saving attachment failed!", 4);
} }
} }
@ -857,7 +950,10 @@ function fix_post ( &$post = null ) {
* test if an object is actually a post * test if an object is actually a post
*/ */
function is_post ( &$post ) { function is_post ( &$post ) {
if ( !empty($post) && is_object($post) && isset($post->ID) && !empty($post->ID) ) if ( ! empty( $post ) &&
is_object( $post ) &&
isset( $post->ID ) &&
! empty( $post->ID ) )
return true; return true;
return false; return false;
@ -917,6 +1013,9 @@ function debug( $message, $level = LOG_NOTICE ) {
if (isset($caller['class'])) if (isset($caller['class']))
$parent = $caller['class'] . '::' . $parent; $parent = $caller['class'] . '::' . $parent;
if (isset($caller['namespace']))
$parent = $caller['namespace'] . '::' . $parent;
return error_log( "{$parent}: {$message}" ); return error_log( "{$parent}: {$message}" );
} }