From e99ec82a769ec004368bba344eab5c1422dc8aba Mon Sep 17 00:00:00 2001 From: Peter Molnar Date: Mon, 11 Jan 2016 15:43:53 +0000 Subject: [PATCH] reacji added, parsing incoming more or less works, but still inactive, UNSTABLE --- composer.json | 17 +- composer.lock | 14 +- vendor/composer/installed.json | 12 + vendor/dissolve/single-emoji-recognizer | 1 + wp-webmention-again.php | 565 +++++++++++++++++++++--- 5 files changed, 553 insertions(+), 56 deletions(-) create mode 160000 vendor/dissolve/single-emoji-recognizer diff --git a/composer.json b/composer.json index 1f80902..ec047db 100644 --- a/composer.json +++ b/composer.json @@ -1,9 +1,24 @@ { "name": "petermolnar/wp-webmention-again", "description": "", + "repositories": [ + { + "type": "package", + "package": { + "name": "dissolve/single-emoji-recognizer", + "version": "0.1", + "source": { + "type": "git", + "url": "https://github.com/dissolve/single-emoji-recognizer.git", + "reference": "origin/master" + } + } + } + ], "require": { "php": ">=5.3.0", - "mf2/mf2": "0.2.*" + "mf2/mf2": "0.2.*", + "dissolve/single-emoji-recognizer": "0.1" }, "license": "GPLv3", "authors": [ diff --git a/composer.lock b/composer.lock index 6e80c23..b680da3 100644 --- a/composer.lock +++ b/composer.lock @@ -4,9 +4,19 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file", "This file is @generated automatically" ], - "hash": "0eb0473bfbeea8c0729e77c3e7687bb8", - "content-hash": "46f4a82eac34d91e4caceddae7feba9e", + "hash": "8a8be50ebb1ed19140da3a2136e5d0da", + "content-hash": "9b7917b522cc2bdcaf0a775ecfe024f0", "packages": [ + { + "name": "dissolve/single-emoji-recognizer", + "version": "0.1", + "source": { + "type": "git", + "url": "https://github.com/dissolve/single-emoji-recognizer.git", + "reference": "origin/master" + }, + "type": "library" + }, { "name": "mf2/mf2", "version": "v0.2.12", diff --git a/vendor/composer/installed.json b/vendor/composer/installed.json index 6b212c2..89046ea 100644 --- a/vendor/composer/installed.json +++ b/vendor/composer/installed.json @@ -53,5 +53,17 @@ "parser", "semantic" ] + }, + { + "name": "dissolve/single-emoji-recognizer", + "version": "0.1", + "version_normalized": "0.1.0.0", + "source": { + "type": "git", + "url": "https://github.com/dissolve/single-emoji-recognizer.git", + "reference": "origin/master" + }, + "type": "library", + "installation-source": "source" } ] diff --git a/vendor/dissolve/single-emoji-recognizer b/vendor/dissolve/single-emoji-recognizer new file mode 160000 index 0000000..b240055 --- /dev/null +++ b/vendor/dissolve/single-emoji-recognizer @@ -0,0 +1 @@ +Subproject commit b240055df0db2d1dbe611e9126ea9a961c8f4cc0 diff --git a/wp-webmention-again.php b/wp-webmention-again.php index 76430ab..23937b1 100644 --- a/wp-webmention-again.php +++ b/wp-webmention-again.php @@ -24,28 +24,41 @@ if ( !class_exists('Mf2\Parser') ) { require (__DIR__ . '/vendor/autoload.php'); } +if (!class_exists('EmojiRecognizer')) { + require (__DIR__ . '/vendor/dissolve/single-emoji-recognizer/src/emoji.php'); +} + class WP_WEBMENTION_AGAIN { - const meta_key = '_webmention_received'; - const cron_key = 'webmention_async'; - const endp_key = 'webmentions_again'; + const meta_received = '_webmention_received'; + const cron_received = 'webmention_received'; + const interval_received = 90; + + const meta_send = '_webmention_send'; + const cron_send = 'webmention_send'; + const interval_send = 90; + + const endp = 'webmentions_again'; public function __construct() { - add_action( 'parse_query', array( &$this, 'parse_query' ) ); + //add_action( 'parse_query', array( &$this, 'receive' ) ); - add_action( 'wp_head', array( &$this, 'html_header' ), 99 ); - add_action( 'send_headers', array( &$this, 'http_header' ) ); + //add_action( 'wp_head', array( &$this, 'html_header' ), 99 ); + //add_action( 'send_headers', array( &$this, 'http_header' ) ); add_action( 'init', array( &$this, 'init')); // this is mostly for debugging reasons register_activation_hook( __FILE__ , array( &$this, 'plugin_activate' ) ); // clear schedules if there's any on deactivation - register_deactivation_hook( __FILE__ , array( &$this, 'plugin_deactivate' ) ); + //register_deactivation_hook( __FILE__ , array( &$this, 'plugin_deactivate' ) ); - // register the action for the cron hook - add_action( static::cron_key, array( &$this, 'process' ) ); + // register the action for processing received + add_action( static::cron_received, array( &$this, 'process_received' ) ); + + // register action for sending + //add_action( static::cron_send, array( &$this, 'process_send' ) ); } @@ -53,12 +66,25 @@ class WP_WEBMENTION_AGAIN { * init hook */ public function init() { - add_filter( 'query_vars', array( &$this, 'query_var' ) ); + // add webmention endpoint to query vars + //add_filter( 'query_vars', array( &$this, 'query_var' ) ); + // extend current cron schedules with our entry + add_filter( 'cron_schedules', array(&$this, 'add_cron_schedule' )); - if (!wp_get_schedule( static::cron_key )) { - wp_schedule_event( time(), 'daily', static::cron_key ); + if (!wp_get_schedule( static::cron_received )) { + wp_schedule_event( time(), 'daily', static::cron_received ); } + + // additional comment types + add_action('admin_comment_types_dropdown', array(&$this, 'comment_types_dropdown')); + + // because this needs one more filter + add_filter('get_avatar_comment_types', array( &$this, 'add_comment_types')); + + // additional avatar filter + add_filter( 'get_avatar' , array(&$this, 'get_avatar'), 1, 5 ); + } /** @@ -79,11 +105,34 @@ class WP_WEBMENTION_AGAIN { //wp_clear_scheduled_hook( __CLASS__ ); } + + /** + * add our own schedule + * + * @param array $schedules - current schedules list + * + * @return array $schedules - extended schedules list + */ + public function add_cron_schedule ( $schedules ) { + + $schedules[ static::cron_received ] = array( + 'interval' => static::interval_received, + 'display' => sprintf(__( 'every %d seconds' ), static::interval_received ) + ); + + $schedules[ static::cron_send ] = array( + 'interval' => static::interval_send, + 'display' => sprintf(__( 'every %d seconds' ), static::interval_send ) + ); + + return $schedules; + } + /** * */ public function html_header () { - $endpoint = site_url( '?'. static::endp_key .'=endpoint' ); + $endpoint = site_url( '?'. static::endp .'=endpoint' ); // backwards compatibility with v0.1 echo '' . "\n"; @@ -91,10 +140,10 @@ class WP_WEBMENTION_AGAIN { } /** - * T + * */ public function http_header() { - $endpoint = site_url( '?'. static::endp_key .'=endpoint' ); + $endpoint = site_url( '?'. static::endp .'=endpoint' ); // backwards compatibility with v0.1 header( 'Link: <' . $endpoint . '>; rel="http://webmention.org/"', false ); @@ -109,10 +158,77 @@ class WP_WEBMENTION_AGAIN { * @return array extended vars */ public function query_var($vars) { - $vars[] = static::endp_key; + $vars[] = static::endp; return $vars; } + /** + * + */ + public static function get_options () { + $options = get_option(__CLASS__); + + if (isset($options['comment_types']) && is_array($options['comment_types'])) + $options['comment_types'] = array_merge($options['comment_types'], static::mftypes()); + else + $options['comment_types'] = static::mftypes(); + + return $options; + } + + /** + * + */ + public static function set_options ( &$options ) { + return update_option( __CLASS__ , $options ); + } + + + /** + * + */ + public static function register_reacji ($reacji) { + $options = static::get_options(); + + if (!in_array($reacji, $options['comment_types'])) { + $options['comment_types'][$reacji] = $reacji; + static::set_options($options); + } + } + + /** + * Extend the "filter by comment type" of in the comments section + * of the admin interface with all of our methods + * + * @param array $types the different comment types + * + * @return array the filtered comment types + */ + public function comment_types_dropdown($types) { + $options = static::get_options(); + + foreach ($options['comment_types'] as $type => $fancy ) { + if (!isset($types[ $type ])) + $types[ $type ] = ucfirst( $type ); + } + return $types; + } + + /** + * + */ + public function add_comment_types ( $types ) { + $options = static::get_options(); + + foreach ($options['comment_types'] as $type => $fancy ) { + if (!in_array( $type, $types )) + array_push( $types, $type ); + } + + return $types; + } + + /** * parse incoming webmention endpoint requests @@ -120,10 +236,10 @@ class WP_WEBMENTION_AGAIN { * * @param mixed $wp WordPress Query */ - public function parse_query( $wp ) { + public function receive( $wp ) { // check if it is a webmention request or not - if ( ! array_key_exists( static::endp_key, $wp->query_vars ) ) + if ( ! array_key_exists( static::endp, $wp->query_vars ) ) return false; // plain text header @@ -146,13 +262,13 @@ class WP_WEBMENTION_AGAIN { $target = filter_var($_POST['target'], FILTER_SANITIZE_URL); $source = filter_var($_POST['source'], FILTER_SANITIZE_URL); - if ( empty($target) ) { + if ( filter_var($target, FILTER_VALIDATE_URL) === false ) { status_header( 400 ); echo '"target" is an invalid URL'; exit; } - if ( empty($source) ) { + if ( filter_var($source, FILTER_VALIDATE_URL) === false ) { status_header( 400 ); echo '"source" is an invalid URL'; exit; @@ -180,66 +296,89 @@ class WP_WEBMENTION_AGAIN { exit; } + /** + * + */ + public function get_received () { + global $wpdb; + $r = array(); + + $dbname = "{$wpdb->prefix}postmeta"; + $key = static::meta_received; + $db_command = "SELECT DISTINCT `post_id` FROM `{$dbname}` WHERE `meta_key` = '{$key}' LIMIT 10"; + + static::debug($db_command); + static::debug('fetching current mentions'); + try { + $q = $wpdb->get_results($db_command); + } + catch (Exception $e) { + static::debug('Something went wrong: ' . $e->getMessage()); + } + + if (!empty($q) && is_array($q)) { + foreach ($q as $post) { + $r[] = $post->post_id; + } + } + + return $r; + } + /** * worker method for doing received webmentions * works in 10 post per cron run, reschedules itself after that * */ - public function process () { + public function process_received () { - $args = array( - 'posts_per_page' => 10, - 'orderby' => 'date', - 'order' => 'ASC', - 'meta_key' => static::meta_key, - 'post_type' => 'any', - 'post_status' => 'any', - 'suppress_filters' => true, - ); - - $posts = get_posts( $args ); + $posts = static::get_received(); if (empty($posts)) { + // re-schedule ourselves, but delay return true; } - foreach ($posts as $post ) { - $received_mentions = get_post_meta ($post->ID, static::meta_key, false); + foreach ($posts as $post_id ) { + $received_mentions = get_post_meta ($post_id, static::meta_received, false); + + static::debug('todo: ' . json_encode($received_mentions)); foreach ($received_mentions as $m) { - static::debug("working on webmention for post #{$post->ID}"); - // delete the current one, because even if it fails, it will change - // to store the pull retries and the status - delete_post_meta($post->ID, static::meta_key, $m); + // $m should not be modified as this is how the current entry can be identified! + $_m = $m; + + static::debug("working on webmention for post #{$post_id}"); // this really should not happen, but if it does, get rid of this entry immediately - if (!isset($m['target']) || empty($m['target']) || !isset($m['source']) || empty($m['source'])) { + if (!isset($_m['target']) || empty($_m['target']) || !isset($_m['source']) || empty($_m['source'])) { static::debug(" target or souce empty, aborting"); continue; } else { - static::debug(" target: {$m['target']}, source: {$m['source']}"); + static::debug(" target: {$_m['target']}, source: {$_m['source']}"); } // if we'be been here before, we have retried counter already - $retries = isset($m['retries']) ? intval($m['retries']) : 0; + $retries = isset($_m['retries']) ? intval($_m['retries']) : 0; // too many retries, drop this mention and walk away - if ($retries >= 5) { + if ($retries >= 500) { static::debug(" this mention was tried earlier at least 5 times, drop it"); continue; } else { - $m['retries'] = $retries + 1; + $_m['retries'] = $retries + 1; } // validate target - $parsed = static::try_get_remote( $m['source'], $m['target'] ); + $parsed = static::try_get_remote( $post_id, $_m['source'], $_m['target'] ); // parsing didn't go well, try again later if ($parsed === false) { static::debug(" parsing this mention failed, retrying next time"); - add_post_meta($post->ID, static::meta_key, $m, false); + update_post_meta($post_id, static::meta_received, $_m, $m); + //add_post_meta($post->ID, static::meta_received, $m, false); continue; } @@ -248,14 +387,16 @@ class WP_WEBMENTION_AGAIN { } // re-schedule ourselves immediately, we may not be finished with the mentions + // delete_post_meta($post->ID, static::meta_received, $m); return true; } /** * remote */ - public static function try_get_remote ( $source, $target ) { + public static function try_get_remote ( &$post_id, &$source, &$target ) { + $content = false; static::debug(" fetching {$source}"); $url = htmlspecialchars_decode($source); $q = wp_remote_get($source); @@ -304,16 +445,252 @@ class WP_WEBMENTION_AGAIN { } else { static::debug(" content is (probably) html, trying to parse it with MF2"); - $content = Mf2\parse($q['body'], $url); + try { + $content = Mf2\parse($q['body'], $url); + } + catch (Exception $e) { + static::debug(' parsing MF2 failed:' . $e->getMessage()); + return false; + } } - return $this->try_save_comment( $source, $target, $content ); + if (!empty($content)) + $c = static::try_parse_comment( $post_id, $source, $target, $content ); + + return false; } - public function try_save_comment ( $source, $target, $content ) { - return true; + /** + * + */ + public static function try_parse_comment ( &$post_id, &$source, &$target, &$raw ) { + $hentry = false; + + $content = static::mf2_unarray($raw); + + if (empty($content) || !is_array($content) || !isset($content['items']) || empty($content['items'])) { + static::debug(' nothing to parse :('); + return false; + } + + $item = false; + $p_authors = array(); + // un-arrayed + if (isset($content['items']['properties']) && isset($content['items']['type'])) { + $item = $content['items']; + if (!isset($item['type']) || ($item['type'] != 'h-entry')) { + static::debug(' no parseable h-entry found, saving as standard mention comment'); + return true; + } + } + elseif (is_array($content['items']) && !empty($content['items']['type'])) { + foreach ($content['items'] as $i) { + if ($i['type'] == 'h-entry') { + $item = $i; + } + elseif ($i['type'] == 'h-card') { + $p_authors[] = $i; + } + } + } + + if (!$item || empty($item)) { + static::debug(' no parseable h-entry found, saving as standard mention comment'); + return true; + } + + // process author + $author_name = $author_url = $avatar = $a = false; + + if (isset($item['properties']['author'])) { + $a = $item['properties']['author']; + } + elseif (!empty($p_authors)) { + $a = array_pop($p_authors); + } + + if ($a && isset($a['properties'])) { + $a = $a['properties']; + + if (isset($a['name']) && !empty($a['name'])) + $author_name = $a['name']; + + $try_photos = array ('photo', 'avatar'); + $p = false; + foreach ($try_photos as $photo) { + if (isset($a[$photo]) && !empty($a[$photo])) { + $p = $a[$photo]; + if (!empty($p)) { + $avatar = $p; + break; + } + } + } + + if (isset($a['url']) && !empty($a['url'])) { + if (is_array($a['url'])) + $author_url = array_pop($a['url']); + else + $author_url = $a['url']; + } + + $author = $a; + } + + // process type + $type = 'comment'; + + foreach ( static::mftypes() as $k => $mapped) { + if (is_array($item['properties']) && isset($item['properties'][$mapped])) + $type = $k; + } + + //process content + $c = ''; + if (isset($item['properties']['content']) && isset($item['properties']['content']['html'])) + $c = $item['properties']['content']['html']; + if (isset($item['properties']['content']) && isset($item['properties']['content']['value'])) + $c = wp_filter_kses($item['properties']['content']['value']); + + // REACJI + $emoji = EmojiRecognizer::isSingleEmoji($c); + + if ($emoji) { + static::debug('wheeeee, reacji!'); + $type = trim($c); + static::register_reacji( $type ); + } + + // process date + if (isset($item['properties']['modified'])) + $date = strtotime($item['properties']['modified']); + elseif (isset($item['properties']['published'])) + $date = strtotime($item['properties']['published']); + else + $date = time(); + + $name = empty($author_name) ? $source : $author_name; + $url = empty($author_url) ? $source : $author_url; + + $c = array ( + 'comment_author' => $name, + 'comment_author_url' => $url, + 'comment_author_email' => '', + 'comment_post_ID' => $post_id, + 'comment_type' => $type, + 'comment_date' => date("Y-m-d H:i:s", $date ), + 'comment_date_gmt' => date("Y-m-d H:i:s", $date ), + 'comment_agent' => __CLASS__, + 'comment_approved' => 0, + 'comment_content' => $c, + ); + + return static::insert_comment ($post_id, $source, $target, $c, $raw, $avatar ); } + /** + * Comment inserter + * + * @param string &$post_id post ID + * @param array &$comment array formatted to match a WP Comment requirement + * @param mixed &$raw Raw format of the comment, like JSON response from the provider + * @param string &$avatar Avatar string to be stored as comment meta + * + */ + public static function insert_comment ( &$post_id, &$source, &$target, &$comment, &$raw, &$avatar = '' ) { + + $comment_id = false; + + // safety first + $comment['comment_author_email'] = filter_var ( $comment['comment_author_email'], FILTER_SANITIZE_EMAIL ); + $comment['comment_author_url'] = filter_var ( $comment['comment_author_url'], FILTER_SANITIZE_URL ); + $comment['comment_author'] = filter_var ( $comment['comment_author'], FILTER_SANITIZE_STRING); + + //test if we already have this imported + $testargs = array( + 'author_url' => $comment['comment_author_url'], + 'post_id' => $post_id, + ); + + // so if the type is comment and you add type = 'comment', WP will not return the comments + // such logical! + if ( $comment['comment_type'] != 'comment') + $testargs['type'] = $comment['comment_type']; + + // in case it's a fav or a like, the date field is not always present + // but there should be only one of those, so the lack of a date field indicates + // that we should not look for a date when checking the existence of the + // comment + if ( isset( $comment['comment_date']) && !empty($comment['comment_date']) ) { + // in case you're aware of a nicer way of doing this, please tell me + // or commit a change... + + $tmp = explode ( " ", $comment['comment_date'] ); + $d = explode( "-", $tmp[0]); + $t = explode (':',$tmp[1]); + + $testargs['date_query'] = array( + 'year' => $d[0], + 'monthnum' => $d[1], + 'day' => $d[2], + 'hour' => $t[0], + 'minute' => $t[1], + 'second' => $t[2], + ); + + //$testargs['date_query'] = $comment['comment_date']; + + //test if we already have this imported + static::debug("checking comment existence (with date) for post #{$post_id}"); + } + else { + // we do need a date + $comment['comment_date'] = date("Y-m-d H:i:s"); + $comment['comment_date_gmt'] = date("Y-m-d H:i:s"); + + static::debug("checking comment existence (no date) for post #{$post_id}"); + } + + $existing = get_comments($testargs); + + // no matching comment yet, insert it + if (!empty($existing)) { + static::debug ('comment already exists'); + return true; + } + + // disable flood control, just in case + remove_filter('check_comment_flood', 'check_comment_flood_db', 10, 3); + $comment = apply_filters( 'preprocess_comment', $comment ); + + if ( $comment_id = wp_new_comment($comment) ) { + + // add avatar for later use if present + if (!empty($avatar)) { + update_comment_meta( $comment_id, 'avatar', $avatar ); + } + + // full raw response for the vote, just in case + update_comment_meta( $comment_id, 'webmention_source_mf2', $raw ); + + // info + $r = "new comment inserted for {$post_id} as #{$comment_id}"; + + // notify author + wp_notify_postauthor( $comment_id ); + } + else { + $r = "something went wrong when trying to insert comment for post #{$post_id}"; + } + + // re-add flood control + add_filter('check_comment_flood', 'check_comment_flood_db', 10, 3); + + static::debug($r); + return $comment_id; + } + + /** * */ @@ -326,11 +703,11 @@ class WP_WEBMENTION_AGAIN { 'target' => $target ); - static::debug ("queueing {static::meta_key} meta for #{$post_id}; source: {$source}, target: {$target}"); - $r = add_post_meta($post_id, static::meta_key, $val, false); + static::debug ("queueing {static::meta_received} meta for #{$post_id}; source: {$source}, target: {$target}"); + $r = add_post_meta($post_id, static::meta_received, $val, false); if ($r == false ) - static::debug ("adding {static::meta_key} meta for #{$post_id} failed"); + static::debug ("adding {static::meta_received} meta for #{$post_id} failed"); return $r; } @@ -373,6 +750,59 @@ class WP_WEBMENTION_AGAIN { error_log( __CLASS__ . ": " . $message ); } + /** + * + */ + public static function mfmap ( $mf2 ) { + $map = static::mftypes(); + + foreach ($map as $k => $mapped) { + if (is_array( $mf2) && isset($mf2[$mapped])) + return $k; + } + + // fallback + return 'comment'; + } + + public static function mftypes () { + $map = array ( + // http://indiewebcamp.com/reply + 'reply' => 'in-reply-to', + // http://indiewebcamp.com/repost + 'repost' => 'repost-of', + // http://indiewebcamp.com/like + 'like' => 'like-of', + // http://indiewebcamp.com/favorite + //'' => array ('favorite','',), + // http://indiewebcamp.com/bookmark + 'bookmark' => 'bookmark-of', + // http://indiewebcamp.com/rsvp + 'rsvp' => 'rsvp', + // http://indiewebcamp.com/tag + 'tag' => 'tag-of', + ); + + return $map; + } + + /** + * + */ + public static function mf2_unarray ( $mf2 ) { + + if (is_array($mf2) && count($mf2) == 1) { + $mf2 = array_pop($mf2); + } + + if (is_array($mf2)) { + foreach($mf2 as $key => $value) { + $mf2[$key] = static::mf2_unarray($value); + } + } + + return $mf2; + } /** * */ @@ -397,6 +827,35 @@ class WP_WEBMENTION_AGAIN { return false; } + + /** + * of there is a comment meta 'avatar' field, use that as avatar for the commenter + * + * @param string $avatar the current avatar image string + * @param mixed $id_or_email this could be anything that triggered the avatar all + * @param string $size size for the image to display + * @param string $default optional fallback + * @param string $alt alt text for the avatar image + */ + public static function get_avatar($avatar, $id_or_email, $size, $default = '', $alt = '') { + if (!is_object($id_or_email) || !isset($id_or_email->comment_type)) + return $avatar; + + // check if comment has an avatar + $c_avatar = get_comment_meta($id_or_email->comment_ID, 'avatar', true); + + if (!$c_avatar) + return $avatar; + + if (false === $alt) + $safe_alt = ''; + else + $safe_alt = esc_attr($alt); + + + return sprintf( '%s', $safe_alt, $c_avatar, $size, $size ); + } + } $WP_WEBMENTION_AGAIN = new WP_WEBMENTION_AGAIN();