diff --git a/composer.json b/composer.json index 3931d21..89e1829 100644 --- a/composer.json +++ b/composer.json @@ -1,9 +1,8 @@ { - "name": "petermolnar/wp-flatbackups", - "description": "WordPress plugin to auto-export WordPress content to flat YAML + HTML files", + "name": "petermolnar/wp-flatexport", + "description": "auto-export WordPress contents to folders and plain text + markdown files for longetivity and portability", "require": { "php": ">=5.3.0", - "ext-yaml": "^1.2" }, "license": "GPLv3", "authors": [ diff --git a/readme.txt b/readme.txt index 81380b4..3d53e02 100644 --- a/readme.txt +++ b/readme.txt @@ -1,30 +1,36 @@ -=== wp-flatbackups === +=== wp-flatexport === Contributors: cadeyrn Donate link: https://paypal.me/petermolnar/3 -Tags: backup, YAML, flat files +Tags: plain text, export, longnow Requires at least: 3.0 Tested up to: 4.4.2 -Stable tag: 0.1.1 +Stable tag: 0.2 License: GPLv3 License URI: http://www.gnu.org/licenses/gpl-3.0.html Required minimum PHP version: 5.3 Auto-export all published content on visit to flat, folder + files based structure. + == Description == +*WARNING* +This plugin is suited for my needs. Since my posts are already in Markdown, there is no HTML conversion present, but there are certain tricks done with the content. +Please be aware of this. + The plugin action is hooked into wp_footer, therefore executed on actual site visit. Content will be exported to wp-content/flat/{post_slug}/ folder ( one folder per post), all attachments copied (or hardlinked, if possible with the filesystem; this is automatic ). -The content will be placed into in item.md file, in YAML + {post_content} format and the same format is applied to comments, named comment-{comment_id}.md in the same folder. +The content will be placed into in item.md file. This is a markdown file, with some plain test headers. +Comments are exported into the same folder, named comment-{comment_id}.md; the format is similar for those as well. -This is not a classic backup, rather an export, to have your content in a readable format for the future. It can't be imported to WordPress (yet). +This is not a backup! +The goal of the plugin is to have a portable, plain text, easy to read, easy to copy version of you content on WordPress. Since not all data is exported, your site cannot be reconstructed from these exports. == Requirements == -* minimum PHP version: 5.3 -* YAML PHP plugin +* minimum PHP version: 5.4 == Installation == @@ -39,10 +45,16 @@ Version numbering logic: * every .B version indicates new features. * every ..C indicates bugfixes for A.B version. += 0.2 = +*2016-06-27* + +* plugin renamed to avoid confusion of the exports being backups +* removed YAML in favour of parseable plain text formatting + = 0.1.1 = *2016-03-08* -* better debugging +* better debugging = 0.1 = *2015-12-10* diff --git a/wp-flatexport.php b/wp-flatexport.php new file mode 100644 index 0000000..29df799 --- /dev/null +++ b/wp-flatexport.php @@ -0,0 +1,567 @@ + +Author URI: http://petermolnar.eu/ +License: GPLv3 +Required minimum PHP version: 5.4 +*/ + +/* Copyright 2015 Peter Molnar ( hello@petermolnar.eu ) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 3, as + published by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +if (!class_exists('WP_FLATEXPORTS')): + +class WP_FLATEXPORTS { + + const force = true; + const basedir = 'flat'; + const basefile = 'item.md'; + const maxattachments = 100; + + public function __construct () { + register_activation_hook( __FILE__ , array( &$this, 'plugin_activate' ) ); + add_action( 'wp_footer', array( 'WP_FLATEXPORTS', 'export')); + } + + /** + * activate hook + */ + public static function plugin_activate() { + if ( version_compare( phpversion(), 5.4, '<' ) ) { + die( 'The minimum PHP version required for this plugin is 5.3' ); + } + + } + + /** + * + */ + public static function export () { + + if ( ! is_singular() ) + return false; + + $post = static::fix_post(); + + if ( $post === false ) + return false; + + // create directory structure + $filename = $post->post_name; + + $flatroot = WP_CONTENT_DIR . DIRECTORY_SEPARATOR . static::basedir; + $flatdir = $flatroot . DIRECTORY_SEPARATOR . $filename; + $flatfile = $flatdir . DIRECTORY_SEPARATOR . static::basefile; + + $post_timestamp = get_the_modified_time( 'U', $post->ID ); + $file_timestamp = 0; + + if ( @file_exists($flatfile) ) { + $file_timestamp = @filemtime ( $flatfile ); + } + + $mkdir = array ( $flatroot, $flatdir ); + foreach ( $mkdir as $dir ) { + if ( !is_dir($dir)) { + if (!mkdir( $dir )) { + static::debug_log('Failed to create ' . $dir . ', exiting export', 4); + return false; + } + } + } + + touch($flatdir, $post_timestamp); + + // get all the attachments + $attachments = get_children( array ( + 'post_parent'=>$post->ID, + 'post_type'=>'attachment', + 'orderby'=>'menu_order', + 'order'=>'asc' + )); + + // 100 is there for sanity + // hardlink all the attachments; no need for copy + // unless you're on a filesystem that does not support hardlinks + if ( !empty($attachments) && count($attachments) < static::maxattachments ) { + $out['attachments'] = array(); + foreach ( $attachments as $aid => $attachment ) { + $attachment_path = get_attached_file( $aid ); + $attachment_file = basename( $attachment_path); + $target_file = $flatdir . DIRECTORY_SEPARATOR . $attachment_file; + static::debug ( "exporting {$attachment_file} for {$post->post_name}", 7 ); + if ( !is_file( $target_file ) ) { + if ( ! link( $attachment_path, $target_file ) ) { + static::debug("could not hardlink '{$attachment_path}' to '{$target_file}'; trying to copy", 5); + if ( ! copy( $attachment_path, $target_file ) ) { + static::debug("could not copy '{$attachment_path}' to '{$target_file}'; saving attachment failed!", 4); + } + } + } + } + } + + // deal with comments + /* + * [TYPE] - reply, like, etc. + * name - url + * date + * + * ![avatar markdown]() + * text + * + */ + + $comments = get_comments ( array( 'post_id' => $post->ID ) ); + if ( $comments ) { + foreach ($comments as $comment) { + + $cfile = $flatdir . DIRECTORY_SEPARATOR . 'comment_' . $comment->comment_ID . '.md'; + $cf_timestamp = 0; + $c_timestamp = strtotime( $comment->comment_date ); + + if ( @file_exists($cfile) ) { + $cf_timestamp = @filemtime ( $cfile ); + } + + if ( $c_timestamp == $cf_timestamp && static::force == false ) { + continue; + } + + $c = "{$comment->comment_type}\n"; + $c .= "{$comment->comment_author} <{$comment->comment_author_email}> - {$comment->comment_author_url}\n"; + $c .= date( 'Y-m-d H:i:s P', $c_timestamp) . "\n\n"; + + if ( $avatar = get_comment_meta ($comment->comment_ID, "avatar", true)) + $c .= "![$comment->comment_author]({$avatar})\n"; + + $c .= $comment->comment_content; + + static::debug ( "Exporting comment # {$comment->comment_ID} to {$cfile}", 6 ); + file_put_contents ($cfile, $c); + touch ( $cfile, $c_timestamp ); + } + } + + // in case our export is fresh or we're not forcing updates on each and + // every time, walk away from this post + if ( $file_timestamp == $post_timestamp && static::force == false ) { + return true; + } + + $out = static::plain_text_post(); + + // write log + static::debug ( "Exporting #{$post->ID}, {$post->post_name} to {$flatfile}", 6 ); + file_put_contents ($flatfile, $out); + touch ( $flatfile, $post_timestamp ); + return true; + } + + + /** + * + * + */ + public static function plain_text_post ( $postid = false ) { + + if ( ! $postid ) + global $post; + else + $post = get_post( $postid ); + + $post = static::fix_post( $post ); + + if ( false === $post ) + return false; + + + $postdata = static::raw_post_data( $post ); + + if ( empty( $postdata ) ) + return false; + + $out = ""; + + if ( isset( $postdata['title'] ) && ! empty( $postdata['title'] ) ) { + $out .= "{$postdata['title']}\n"; + // first level header + $out .= str_repeat( "=", strlen( $postdata['title'] ) ) . "\n\n"; + } + + //if ( !empty ( $postdata['reactions'] ) ) + //$out .= $postdata['reactions'] . "\n\n"; + + if (isset($postdata['excerpt']) && !empty($postdata['excerpt'])) + $out .= $postdata['excerpt'] . "\n\n"; + + $out .= $postdata['content'] . "\n\n"; + + $out .= "Published\n"; + $out .= "---------\n"; + $out .= "{$postdata['published']}\n\n"; + + if ( $postdata['published'] != $postdata['modified'] ) { + $out .= "Updated\n"; + $out .= "-------\n"; + $out .= "{$postdata['modified']}\n\n"; + } + + $out .= "URLs\n"; + $out .= "----\n"; + $out .= "- <" . join ( ">\n- <", $postdata['urls'] ) . ">\n\n"; + + if ( isset( $postdata['author'] ) && ! empty( $postdata['author'] ) ) { + $out .= "Author\n"; + $out .= "------\n"; + $out .= "{$postdata['author']}\n\n"; + } + + if ( ! empty( $postdata['tags'] ) ) { + $tags = array(); + foreach ( $postdata['tags'] as $k => $tag ) { + array_push( $tags, "#{$tag->name}" ); + } + $tags = join (', ', $tags); + $out .= "Tags\n"; + $out .= "----\n"; + // these are hashtags, so escape the first one to avoid converting it into + // a header + $out .= "\\" . $tags . "\n\n"; + } + + return $out; + } + + /** + * + */ + public static function post_content ( &$post ) { + + $content = trim( $post->post_content ); + + $urlparts = parse_url(site_url()); + $domain = $urlparts ['host']; + $wp_upload_dir = wp_upload_dir(); + $uploadurl = str_replace( '/', "\\/", trim( str_replace( site_url(), '', $wp_upload_dir['url']), '/')); + + // get trailing hashtags, if any + //$hashtags = static::find_hashtag_line( $content ); + //$content = static::maybe_remove_hashtag_line( $content, $hashtags ); + + // fix all image attachments: resized -> original + $pregstr = "/((https?:\/\/". $domain .")?\/". $uploadurl ."\/.*\/[0-9]{4}\/[0-9]{2}\/)(.*)-([0-9]{1,4})×([0-9]{1,4})\.([a-zA-Z]{2,4})/"; + + preg_match_all( $pregstr, $content, $resized_images ); + + if ( !empty ( $resized_images[0] )) { + foreach ( $resized_images[0] as $cntr => $imgstr ) { + $done_images[ $resized_images[2][$cntr] ] = 1; + $fname = $resized_images[2][$cntr] . '.' . $resized_images[5][$cntr]; + $width = $resized_images[3][$cntr]; + $height = $resized_images[4][$cntr]; + $r = $fname . '?resize=' . $width . ',' . $height; + $content = str_replace ( $imgstr, $r, $content ); + } + } + + $pregstr = "/(https?:\/\/". $domain .")?\/". $uploadurl ."\/.*\/[0-9]{4}\/[0-9]{2}\/(.*?)\.([a-zA-Z]{2,4})/"; + + preg_match_all( $pregstr, $content, $images ); + if ( !empty ( $images[0] )) { + + foreach ( $images[0] as $cntr=>$imgstr ) { + if ( !isset($done_images[ $images[1][$cntr] ]) ){ + if ( !strstr($images[1][$cntr], 'http')) + $fname = $images[2][$cntr] . '.' . $images[3][$cntr]; + else + $fname = $images[1][$cntr] . '.' . $images[2][$cntr]; + + $content = str_replace ( $imgstr, $fname, $content ); + } + } + } + + // insert featured image + $thid = get_post_thumbnail_id( $post->ID ); + if ( ! empty( $thid ) ) { + $src = wp_get_attachment_image_src( $thid, 'full' ); + if ( isset($src[0]) ) { + $meta = wp_get_attachment_metadata($thid); + + if ( empty( $meta['image_meta']['title'] ) ) + $title = $post->post_title; + else + $title = $meta['image_meta']['title']; + + $content .= "\n\n![{$title}]({$src[0]}){#img-{$thid}}"; + } + } + + // get rid of wp_upload_dir in self urls + $pattern = "/\({$wp_upload_dir['baseurl']}\/(.*?)\)/"; + $search = str_replace( '/', '\/', $wp_upload_dir['baseurl'] ); + $content = preg_replace( "/\({$search}\/(.*?)\)/", '(${1})', $content ); + + // get rid of {#img-ID} -s + $content = preg_replace( "/\{\#img-[0-9]+.*?\}/", "", $content ); + + // convert standalone urls to + $content = preg_replace("/\b((?:http|https)\:\/\/?[a-zA-Z0-9\.\/\?\:@\-_=#]+\.[a-zA-Z0-9\.\/\?\:@\-_=#&]*)(?:\s|\n|\r|$)/i", '<${1}>', $content); + + // find all second level headers and replace them with underlined version + $pattern = "/^##\s?+(.*)$/m"; + $matches = array(); + preg_match_all( $pattern, $content, $matches ); + + if ( ! empty( $matches ) && isset( $matches[0] ) && ! empty( $matches[0] ) ) { + foreach ( $matches[0] as $cntr => $match ) { + $title = trim( $matches[1][$cntr] ); + $content = str_replace ( $match, $title ."\n" . str_repeat( "-", strlen( $title ) ), $content ); + } + } + + // find links and replace them with footnote versions + $pattern = "/\s+(\[([^\s].*?)\]\((.*?)(\s?+[\\\"\'].*?[\\\"\'])?\))/"; + $matches = array(); + preg_match_all( $pattern, $content, $matches ); + // [1] -> array of []() + // [2] -> array of [] + // [3] -> array of () + // [4] -> (maybe) "" titles + if ( ! empty( $matches ) && isset( $matches[0] ) && ! empty( $matches[0] ) ) { + foreach ( $matches[1] as $cntr => $match ) { + $name = trim( $matches[2][$cntr] ); + $url = trim( $matches[3][$cntr] ); + $title = ""; + + if ( isset( $matches[4][$cntr] ) && !empty( $matches[4][$cntr] ) ) + $title = " {$matches[4][$cntr]}"; + + $footnotes[] = "[{$name}]: {$url}{$title}"; + $content = str_replace ( $match, "[" . trim( $matches[2][$cntr] ) . "]" , $content ); + } + + $content = $content . "\n\n" . join( "\n", $footnotes ); + } + + // find images and replace them with footnote versions ? + + return $content; + } + + /** + * raw data for various representations, like JSON or YAML + */ + public static function raw_post_data ( &$post = null ) { + + $post = static::fix_post( $post ); + + if ($post === false) + return false; + + $content = static::post_content ( $post ); + + // excerpt + $excerpt = ""; + if( $post->post_excerpt && !empty( trim( $post->post_excerpt ) ) ) { + $excerpt = trim( $post->post_excerpt ); + } + + // get author name + $author_id = $post->post_author; + $author_name = get_the_author_meta ( 'display_name' , $author_id ); + $author_email = get_the_author_meta ( 'email' , $author_id ); + $author_url = get_the_author_meta ( 'url' , $author_id ); + $author = "{$author_name} <{$author_email}>\n<{$author_url}>"; + + // get a list of all possible URLs to this post, including syndications + $post_urls = array(); + + $slugs = get_post_meta ( $post->ID, '_wp_old_slug' ); + array_push ( $slugs, $post->post_name ); + array_push ( $slugs, $post->ID ); + $slugs = array_unique ( $slugs ); + + foreach ( $slugs as $k => $slug ) { + if ( preg_match ( '/-revision-v[0-9]+/', $slug ) ) { + unset ( $slugs[ $k ] ); + continue; + } + + $slugs[ $k ] = rtrim( site_url(), '/') . '/' . $slug; + } + + $syndications = get_post_meta ( $post->ID, 'syndication_urls', true ); + if ( ! empty( $syndications ) ) { + $syndications = explode( "\n", trim( $syndications ) ); + array_merge( $slugs, $syndications ); + } + + array_push ( $slugs, get_permalink( $post ) ); + array_push ( $slugs, wp_get_shortlink( $post->ID ) ); + + foreach ( $slugs as $k => $slug ) { + $slugs[ $k ] = rtrim( $slug, '/' ); + } + + $slugs = array_unique ( $slugs ); + usort( $slugs, function ( $a, $b ) { return strlen( $a ) - strlen( $b ); } ); + + // read tags + $tags = wp_get_post_terms( $post->ID, 'post_tag' ); + + // assemble the data + $out = array ( + 'title' => trim( get_the_title( $post->ID ) ), + 'modified' => get_the_modified_time( 'Y-m-d H:i:s P', $post->ID ), + 'published' => get_the_time( 'Y-m-d H:i:s P', $post->ID ), + 'urls' => $slugs, + 'tags' => $tags, + 'author' => $author, + 'content' => $content, + 'excerpt' => trim( $excerpt ), + //'reactions' => static::meta_reaction( $post ), + ); + + return $out; + } + + + /** + * + * + public static function meta_reaction ( &$post ) { + + $url = trim ( get_post_meta( $post->ID, 'webmention_url', true ) ); + $type = trim ( get_post_meta( $post->ID, 'webmention_type', true ) ); + $rsvp = trim ( get_post_meta( $post->ID, 'webmention_data', true ) ); + + if ( empty( $url ) ) + return false; + + switch ($type) { + case 'from': + case 'repost': + $prefix = '**FROM:** '; + break; + case 're': + case 'reply': + case 'rsvp': + $prefix = '**RE:** '; + break; + default: + $prefix = '**URL:** '; + break; + } + + $r = "{$prefix}${url}"; + if ( !empty ($rsvp) ) + $r .= "\n{$rsvp}"; + + return $r; + } + */ + + /** + * do everything to get the Post object + */ + public static function fix_post ( &$post = null ) { + if ($post === null || !static::is_post($post)) + global $post; + + if (static::is_post($post)) + return $post; + + return false; + } + + /** + * test if an object is actually a post + */ + public static function is_post ( &$post ) { + if ( !empty($post) && is_object($post) && isset($post->ID) && !empty($post->ID) ) + return true; + + return false; + } + + /** + * + * debug messages; will only work if WP_DEBUG is on + * or if the level is LOG_ERR, but that will kill the process + * + * @param string $message + * @param int $level + * + * @output log to syslog | wp_die on high level + * @return false on not taking action, true on log sent + */ + public static function debug( $message, $level = LOG_NOTICE ) { + if ( empty( $message ) ) + return false; + + if ( @is_array( $message ) || @is_object ( $message ) ) + $message = json_encode($message); + + $levels = array ( + LOG_EMERG => 0, // system is unusable + LOG_ALERT => 1, // Alert action must be taken immediately + LOG_CRIT => 2, // Critical critical conditions + LOG_ERR => 3, // Error error conditions + LOG_WARNING => 4, // Warning warning conditions + LOG_NOTICE => 5, // Notice normal but significant condition + LOG_INFO => 6, // Informational informational messages + LOG_DEBUG => 7, // Debug debug-level messages + ); + + // number for number based comparison + // should work with the defines only, this is just a make-it-sure step + $level_ = $levels [ $level ]; + + // in case WordPress debug log has a minimum level + if ( defined ( 'WP_DEBUG_LEVEL' ) ) { + $wp_level = $levels [ WP_DEBUG_LEVEL ]; + if ( $level_ > $wp_level ) { + return false; + } + } + + // ERR, CRIT, ALERT and EMERG + if ( 3 >= $level_ ) { + wp_die( '

Error:

' . '

' . $message . '

' ); + exit; + } + + $trace = debug_backtrace(); + $caller = $trace[1]; + $parent = $caller['function']; + + if (isset($caller['class'])) + $parent = $caller['class'] . '::' . $parent; + + return error_log( "{$parent}: {$message}" ); + } + +} + +$WP_FLATEXPORTS = new WP_FLATEXPORTS(); + +endif;