From 27ce87b301421a618aea5a9ef91e5e0e4d19dce4 Mon Sep 17 00:00:00 2001 From: Matthias Pfefferle Date: Mon, 13 Oct 2025 10:45:16 +0200 Subject: [PATCH 01/35] Add support for ActivityPub object post type Introduces a new 'ap_object' post type and related taxonomies for handling ActivityPub objects, including registration, CRUD operations, and taxonomy management. Updates create and update handlers to process non-interaction objects using the new Objects collection. Enhances debug functionality to support the new post type and taxonomies. --- includes/class-post-types.php | 74 +++++++++++ includes/collection/class-inbox.php | 5 + includes/collection/class-objects.php | 169 ++++++++++++++++++++++++++ includes/collection/class-outbox.php | 5 + includes/debug.php | 29 ++++- includes/handler/class-create.php | 83 ++++++++++--- includes/handler/class-update.php | 31 ++++- 7 files changed, 371 insertions(+), 25 deletions(-) create mode 100644 includes/collection/class-objects.php diff --git a/includes/class-post-types.php b/includes/class-post-types.php index b9994ec12..d9e07c236 100644 --- a/includes/class-post-types.php +++ b/includes/class-post-types.php @@ -11,6 +11,7 @@ use Activitypub\Collection\Extra_Fields; use Activitypub\Collection\Followers; use Activitypub\Collection\Inbox; +use Activitypub\Collection\Objects; use Activitypub\Collection\Outbox; use Activitypub\Collection\Remote_Actors; @@ -25,6 +26,7 @@ public static function init() { \add_action( 'init', array( self::class, 'register_remote_actors_post_type' ), 11 ); \add_action( 'init', array( self::class, 'register_inbox_post_type' ), 11 ); \add_action( 'init', array( self::class, 'register_outbox_post_type' ), 11 ); + \add_action( 'init', array( self::class, 'register_object_post_type' ), 11 ); \add_action( 'init', array( self::class, 'register_extra_fields_post_types' ), 11 ); \add_action( 'init', array( self::class, 'register_activitypub_post_meta' ), 11 ); @@ -345,6 +347,78 @@ public static function register_outbox_post_type() { ); } + /** + * Register the Object post type. + */ + public static function register_object_post_type() { + \register_post_type( + Objects::POST_TYPE, + array( + 'labels' => array( + 'name' => \_x( 'Posts', 'post_type plural name', 'activitypub' ), + 'singular_name' => \_x( 'Post', 'post_type single name', 'activitypub' ), + ), + 'capabilities' => array( + 'create_posts' => false, + ), + 'map_meta_cap' => true, + 'public' => false, + 'show_in_rest' => true, + 'rewrite' => false, + 'query_var' => false, + 'supports' => array( 'title', 'editor', 'author', 'custom-fields', 'excerpt', 'comments' ), + 'delete_with_user' => true, + 'can_export' => true, + 'exclude_from_search' => true, + 'taxonomies' => array( 'ap_tag' ), + ) + ); + + \register_taxonomy( + 'ap_tag', + array( Objects::POST_TYPE ), + array( + 'labels' => array( + 'name' => \_x( 'Tags', 'taxonomy general name', 'activitypub' ), + 'singular_name' => \_x( 'Tag', 'taxonomy singular name', 'activitypub' ), + 'search_items' => \__( 'Search Tags', 'activitypub' ), + 'all_items' => \__( 'All Tags', 'activitypub' ), + 'edit_item' => \__( 'Edit Tag', 'activitypub' ), + 'update_item' => \__( 'Update Tag', 'activitypub' ), + 'add_new_item' => \__( 'Add New Tag', 'activitypub' ), + 'new_item_name' => \__( 'New Tag Name', 'activitypub' ), + 'menu_name' => \__( 'Tags', 'activitypub' ), + ), + 'public' => false, + 'query_var' => true, + 'show_in_rest' => true, + 'rewrite' => array( 'slug' => 'ap_tag' ), + ) + ); + + \register_taxonomy( + 'ap_object_type', + array( Objects::POST_TYPE ), + array( + 'labels' => array( + 'name' => \_x( 'Object Types', 'taxonomy general name', 'activitypub' ), + 'singular_name' => \_x( 'Object Type', 'taxonomy singular name', 'activitypub' ), + 'search_items' => \__( 'Search Object Types', 'activitypub' ), + 'all_items' => \__( 'All Object Types', 'activitypub' ), + 'edit_item' => \__( 'Edit Object Type', 'activitypub' ), + 'update_item' => \__( 'Update Object Type', 'activitypub' ), + 'add_new_item' => \__( 'Add New Object Type', 'activitypub' ), + 'new_item_name' => \__( 'New Object Type Name', 'activitypub' ), + 'menu_name' => \__( 'Object Types', 'activitypub' ), + ), + 'public' => false, + 'query_var' => true, + 'show_in_rest' => true, + 'rewrite' => array( 'slug' => 'ap_object_type' ), + ) + ); + } + /** * Register the Extra Fields post types. */ diff --git a/includes/collection/class-inbox.php b/includes/collection/class-inbox.php index fadf9a53d..4242f4ee2 100644 --- a/includes/collection/class-inbox.php +++ b/includes/collection/class-inbox.php @@ -20,6 +20,11 @@ * @link https://www.w3.org/TR/activitypub/#inbox */ class Inbox { + /** + * The post type for the objects. + * + * @var string + */ const POST_TYPE = 'ap_inbox'; /** diff --git a/includes/collection/class-objects.php b/includes/collection/class-objects.php new file mode 100644 index 000000000..977126412 --- /dev/null +++ b/includes/collection/class-objects.php @@ -0,0 +1,169 @@ +ID ); + + self::add_taxonomies( $post_id, $activity_object ); + + return \get_post( $post_id ); + } + + /** + * Get an object from the collection. + * + * @param int $id The object ID. + * + * @return \WP_Post|array|null The object post or WP_Error on failure. + */ + public static function get( $id ) { + return \get_post( $id ); + } + + /** + * Get an object by its GUID. + * + * @param string $guid The object GUID. + * + * @return \WP_Post|\WP_Error The object post or WP_Error on failure. + */ + public static function get_by_guid( $guid ) { + global $wpdb; + // phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.DirectDatabaseQuery.NoCaching + $post_id = $wpdb->get_var( + $wpdb->prepare( + "SELECT ID FROM $wpdb->posts WHERE guid=%s AND post_type=%s", + \esc_url( $guid ), + self::POST_TYPE + ) + ); + + if ( ! $post_id ) { + return new \WP_Error( + 'activitypub_object_not_found', + \__( 'Object not found', 'activitypub' ), + array( 'status' => 404 ) + ); + } + + return \get_post( $post_id ); + } + + /** + * Update an object in the collection. + * + * @param array $activity The activity object data. + * + * @return \WP_Post|\WP_Error The updated object post or WP_Error on failure. + */ + public static function update( $activity ) { + $post = self::get_by_guid( $activity['object']['id'] ); + if ( \is_wp_error( $post ) ) { + return $post; + } + + $post_array = self::activity_to_post( $activity['object'] ); + $post_array['ID'] = $post->ID; + $post_id = \wp_update_post( $post_array, true ); + + if ( \is_wp_error( $post_id ) ) { + return $post_id; + } + + self::add_taxonomies( $post_id, $activity['object'] ); + + return \get_post( $post_id ); + } + + /** + * Convert JSON input to a Base_Object. + * + * @param array $activity The activity array. + * + * @return \WP_Post|\WP_Error An Object built from the JSON string or WP_Error when it's not a JSON string. + */ + private static function activity_to_post( $activity ) { + if ( ! is_array( $activity ) ) { + return new \WP_Error( 'invalid_activity', __( 'Invalid activity format', 'activitypub' ) ); + } + + $post = array( + 'post_title' => isset( $activity['name'] ) ? \wp_strip_all_tags( $activity['name'] ) : '', + 'post_content' => isset( $activity['content'] ) ? \wp_kses_post( $activity['content'] ) : '', + 'post_excerpt' => isset( $activity['summary'] ) ? \wp_strip_all_tags( $activity['summary'] ) : '', + 'post_status' => 'publish', + 'post_type' => self::POST_TYPE, + 'guid' => isset( $activity['id'] ) ? \esc_url_raw( $activity['id'] ) : '', + ); + + return $post; + } + + /** + * Add taxonomies to the object post. + * + * @param int $post_id The post ID. + * @param array $activity_object The activity object data. + * + * @return void + */ + private static function add_taxonomies( $post_id, $activity_object ) { + // Save Object Type as Taxonomy item. + \wp_set_post_terms( $post_id, array( $activity_object['type'] ), 'ap_object_type' ); + + $tags = array(); + + // Save the Hashtags as Taxonomy items. + if ( ! empty( $activity_object['tag'] ) && \is_array( $activity_object['tag'] ) ) { + foreach ( $activity_object['tag'] as $tag ) { + if ( isset( $tag['type'] ) && 'Hashtag' === $tag['type'] && isset( $tag['name'] ) ) { + $tags[] = \wp_strip_all_tags( ltrim( $tag['name'], '#' ) ); + } + } + } + + \wp_set_post_terms( $post_id, $tags, 'ap_tag' ); + } +} diff --git a/includes/collection/class-outbox.php b/includes/collection/class-outbox.php index 9e3011891..83fa00b4f 100644 --- a/includes/collection/class-outbox.php +++ b/includes/collection/class-outbox.php @@ -20,6 +20,11 @@ * @link https://www.w3.org/TR/activitypub/#outbox */ class Outbox { + /** + * The post type for the objects. + * + * @var string + */ const POST_TYPE = 'ap_outbox'; /** diff --git a/includes/debug.php b/includes/debug.php index e629ccd65..04bad8e3a 100644 --- a/includes/debug.php +++ b/includes/debug.php @@ -8,6 +8,7 @@ namespace Activitypub; use Activitypub\Collection\Inbox; +use Activitypub\Collection\Objects; use Activitypub\Collection\Outbox; /** @@ -32,8 +33,8 @@ function allow_localhost( $parsed_args ) { * * @return array The arguments for the post type. */ -function debug_outbox_post_type( $args, $post_type ) { - if ( ! \in_array( $post_type, array( Outbox::POST_TYPE, Inbox::POST_TYPE ), true ) ) { +function debug_post_type( $args, $post_type ) { + if ( ! \in_array( $post_type, array( Outbox::POST_TYPE, Inbox::POST_TYPE, Objects::POST_TYPE ), true ) ) { return $args; } @@ -43,11 +44,33 @@ function debug_outbox_post_type( $args, $post_type ) { $args['menu_icon'] = 'dashicons-upload'; } elseif ( Inbox::POST_TYPE === $post_type ) { $args['menu_icon'] = 'dashicons-download'; + } elseif ( Objects::POST_TYPE === $post_type ) { + $args['menu_icon'] = 'dashicons-media-document'; + } + + return $args; +} +\add_filter( 'register_post_type_args', '\Activitypub\debug_post_type', 10, 2 ); + +/** + * Debug the object type taxonomy. + * + * @param array $args The arguments for the taxonomy. + * @param string $taxonomy The taxonomy. + * + * @return array The arguments for the taxonomy. + */ +function debug_taxonomy( $args, $taxonomy ) { + if ( ! in_array( $taxonomy, array( 'ap_object_type', 'ap_tag' ), true ) ) { + return $args; } + $args['show_ui'] = true; + $args['show_in_menu'] = true; + return $args; } -\add_filter( 'register_post_type_args', '\Activitypub\debug_outbox_post_type', 10, 2 ); +\add_filter( 'register_taxonomy_args', '\Activitypub\debug_taxonomy', 10, 2 ); /** * Debug the outbox post type column. diff --git a/includes/handler/class-create.php b/includes/handler/class-create.php index 579562201..d0df11dee 100644 --- a/includes/handler/class-create.php +++ b/includes/handler/class-create.php @@ -8,6 +8,7 @@ namespace Activitypub\Handler; use Activitypub\Collection\Interactions; +use Activitypub\Collection\Objects; use function Activitypub\get_activity_visibility; use function Activitypub\is_activity_reply; @@ -34,14 +35,38 @@ public static function init() { * @param \Activitypub\Activity\Activity $activity_object Optional. The activity object. Default null. */ public static function handle_create( $activity, $user_id, $activity_object = null ) { - // Check if Activity is public or not. - if ( - ACTIVITYPUB_CONTENT_VISIBILITY_PRIVATE === get_activity_visibility( $activity ) || - ! is_activity_reply( $activity ) - ) { - return; + // Check for private and/or direct messages. + if ( ACTIVITYPUB_CONTENT_VISIBILITY_PRIVATE === get_activity_visibility( $activity ) ) { + $result = false; + } elseif ( is_activity_reply( $activity ) ) { // Check for replies. + $result = self::create_interaction( $activity, $user_id, $activity_object ); + } else { // Handle non-interaction objects. + $result = self::create_object( $activity, $user_id, $activity_object ); } + $success = ( false !== $result && ! \is_wp_error( $result ) ); + + /** + * Fires after an ActivityPub Create activity has been handled. + * + * @param array $activity The ActivityPub activity data. + * @param int $user_id The local user ID. + * @param bool $success True on success, false otherwise. + * @param array|string|int|\WP_Error|false $result The WP_Comment object of the created comment, or null if creation failed. + */ + \do_action( 'activitypub_handled_create', $activity, $user_id, $success, $result ); + } + + /** + * Handle interactions like replies. + * + * @param array $activity The activity-object. + * @param int $user_id The id of the local blog-user. + * @param \Activitypub\Activity\Activity $activity_object Optional. The activity object. Default null. + * + * @return \WP_Comment|\WP_Error|false The created comment, WP_Error on failure, false if not processed. + */ + public static function create_interaction( $activity, $user_id, $activity_object = null ) { $check_dupe = object_id_to_comment( $activity['object']['id'] ); // If comment exists, call update action. @@ -61,23 +86,41 @@ public static function handle_create( $activity, $user_id, $activity_object = nu return; } - $success = false; - $result = Interactions::add_comment( $activity ); + $result = Interactions::add_comment( $activity ); - if ( $result && ! \is_wp_error( $result ) ) { - $success = true; - $result = \get_comment( $result ); + if ( ! $result || \is_wp_error( $result ) ) { + return $result; } - /** - * Fires after an ActivityPub Create activity has been handled. - * - * @param array $activity The ActivityPub activity data. - * @param int $user_id The local user ID. - * @param bool $success True on success, false otherwise. - * @param array|string|int|\WP_Error|false $result The WP_Comment object of the created comment, or null if creation failed. - */ - \do_action( 'activitypub_handled_create', $activity, $user_id, $success, $result ); + return \get_comment( $result ); + } + + /** + * Handle non-interaction objects like posts. + * + * @param array $activity The activity-object. + * @param int $user_id The id of the local blog-user. + * @param \Activitypub\Activity\Activity $activity_object Optional. The activity object. Default null. + * + * @return \WP_Post|\WP_Error The post on success or WP_Error on failure. + */ + public static function create_object( $activity, $user_id, $activity_object = null ) { + $check_dupe = Objects::get_by_guid( $activity['object']['id'] ); + + // If comment exists, call update action. + if ( ! \is_wp_error( $check_dupe ) ) { + /** + * Fires when a Create activity is received for an existing comment. + * + * @param array $activity The activity-object. + * @param int $user_id The id of the local blog-user. + * @param \Activitypub\Activity\Activity $activity_object The activity object. + */ + \do_action( 'activitypub_inbox_update', $activity, $user_id, $activity_object ); + return; + } + + return Objects::add( $activity ); } /** diff --git a/includes/handler/class-update.php b/includes/handler/class-update.php index 7ff72ac2a..abb0c20a9 100644 --- a/includes/handler/class-update.php +++ b/includes/handler/class-update.php @@ -8,10 +8,11 @@ namespace Activitypub\Handler; use Activitypub\Collection\Interactions; +use Activitypub\Collection\Objects; use Activitypub\Collection\Remote_Actors; use function Activitypub\get_remote_metadata_by_actor; - +use function Activitypub\is_activity_reply; /** * Handle Update requests. */ @@ -58,7 +59,12 @@ public static function handle_update( $activity, $user_id ) { case 'Video': case 'Event': case 'Document': - self::update_interaction( $activity, $user_id ); + // Check for private and/or direct messages. + if ( is_activity_reply( $activity ) ) { + self::update_interaction( $activity, $user_id ); + } else { + self::update_object( $activity, $user_id ); + } break; /* @@ -99,6 +105,27 @@ public static function update_interaction( $activity, $user_id ) { \do_action( 'activitypub_handled_update', $activity, $user_id, $success, $result ); } + /** + * Update an Object. + * + * @param array $activity The Activity object. + * @param int $user_id The user ID. Always null for Update activities. + */ + public static function update_object( $activity, $user_id ) { + $result = Objects::update( $activity ); + $success = ( false !== $result && ! \is_wp_error( $result ) ); + + /** + * Fires after an ActivityPub Update activity has been handled. + * + * @param array $activity The ActivityPub activity data. + * @param int $user_id The local user ID. + * @param bool $success True on success, false otherwise. + * @param array|string|int|\WP_Error|false $result The updated comment, or null if update failed. + */ + \do_action( 'activitypub_handled_update', $activity, $user_id, $success, $result ); + } + /** * Update an Actor. * From fd3661e485fc7f802bfce1ed47887266c69f80a1 Mon Sep 17 00:00:00 2001 From: Matthias Pfefferle Date: Mon, 13 Oct 2025 11:21:38 +0200 Subject: [PATCH 02/35] Add ActivityPub content sanitizer and update usage Introduced Sanitize::content() to process and format content for ActivityPub, including block support and HTML sanitization. Updated Objects class to use the new sanitizer for post content. --- includes/class-sanitize.php | 39 +++++++++++++++++++++++++++ includes/collection/class-objects.php | 4 ++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/includes/class-sanitize.php b/includes/class-sanitize.php index 1f006b66e..32a3835a3 100644 --- a/includes/class-sanitize.php +++ b/includes/class-sanitize.php @@ -182,4 +182,43 @@ public static function webfinger( $value ) { return $value; } + + /** + * Sanitize content for ActivityPub. + * + * @param string $content The content to convert. + * + * @return string The converted content. + */ + public static function content( $content ) { + $content = \make_clickable( $content ); + $content = \wpautop( $content ); + $content = \wp_kses_post( $content ); + + var_dump( $content ); + + if ( ! site_supports_blocks() ) { + return $content; + } + + $content = \preg_split( '/(
|
|<\/p>|' . PHP_EOL . ')/i', $content ); + $content = array_map( 'trim', $content ); + $content = array_map( + function ( $el ) { + $el = preg_replace( '/^

/i', '', $el ); + $el = preg_replace( '/<\/p>$/i', '', $el ); + + return $el; + }, + $content + ); + + $content = array_filter( $content ); + + if ( empty( $content ) ) { + return ''; + } + + return '' . PHP_EOL . '

' . implode( '

' . PHP_EOL . '' . PHP_EOL . PHP_EOL . '' . PHP_EOL . '

', $content ) . '

' . PHP_EOL . ''; + } } diff --git a/includes/collection/class-objects.php b/includes/collection/class-objects.php index 977126412..762b8914b 100644 --- a/includes/collection/class-objects.php +++ b/includes/collection/class-objects.php @@ -7,6 +7,8 @@ namespace Activitypub\Collection; +use Activitypub\Sanitize; + use function Activitypub\object_to_uri; /** @@ -131,7 +133,7 @@ private static function activity_to_post( $activity ) { $post = array( 'post_title' => isset( $activity['name'] ) ? \wp_strip_all_tags( $activity['name'] ) : '', - 'post_content' => isset( $activity['content'] ) ? \wp_kses_post( $activity['content'] ) : '', + 'post_content' => isset( $activity['content'] ) ? Sanitize::content( $activity['content'] ) : '', 'post_excerpt' => isset( $activity['summary'] ) ? \wp_strip_all_tags( $activity['summary'] ) : '', 'post_status' => 'publish', 'post_type' => self::POST_TYPE, From 570742044a332203982f60a2f551f59eb40a5c76 Mon Sep 17 00:00:00 2001 From: Matthias Pfefferle Date: Mon, 13 Oct 2025 12:06:07 +0200 Subject: [PATCH 03/35] Add HTML to blocks conversion and refactor sanitization Introduces a new Blocks::html_to_blocks() method to convert HTML content into block format using DOMDocument parsing and block mapping. Refactors Sanitize to use this new method, replacing the previous regex-based paragraph splitting logic for improved accuracy and maintainability. --- includes/class-blocks.php | 118 ++++++++++++++++++++++++++++++++++++ includes/class-sanitize.php | 22 +------ 2 files changed, 119 insertions(+), 21 deletions(-) diff --git a/includes/class-blocks.php b/includes/class-blocks.php index 4c1a2430e..7415c0484 100644 --- a/includes/class-blocks.php +++ b/includes/class-blocks.php @@ -438,4 +438,122 @@ public static function revert_embed_links( $block_content, $block ) { } return '

' . $block['attrs']['url'] . '

'; } + + /** + * Convert HTML content to blocks. + * + * @param string $content The HTML content. + * + * @return string The content converted to blocks. + */ + public static function html_to_blocks( $content ) { + if ( empty( $content ) ) { + return ''; + } + + // Load the content into a DOMDocument. + $dom = new \DOMDocument(); + \libxml_use_internal_errors( true ); + $dom->loadHTML( '' . $content ); + \libxml_clear_errors(); + $body = $dom->getElementsByTagName( 'body' )->item( 0 ); + + if ( ! $body ) { + return $content; + } + + $_content = ''; + + // Pre-compute block type mapping for better performance. + static $block_map = array( + 'ul' => 'list', + 'ol' => 'list', + 'img' => 'image', + 'blockquote' => 'quote', + 'h1' => 'heading', + 'h2' => 'heading', + 'h3' => 'heading', + 'h4' => 'heading', + 'h5' => 'heading', + 'h6' => 'heading', + 'p' => 'paragraph', + 'a' => 'paragraph', + 'abbr' => 'paragraph', + 'b' => 'paragraph', + 'code' => 'paragraph', + 'em' => 'paragraph', + 'i' => 'paragraph', + 'strong' => 'paragraph', + 'sub' => 'paragraph', + 'sup' => 'paragraph', + 'span' => 'paragraph', + 'u' => 'paragraph', + 'figure' => 'image', + 'hr' => 'separator', + ); + + // phpcs:disable WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase + foreach ( $body->childNodes as $node ) { + $node_name = strtolower( $node->nodeName ); + $block = $block_map[ $node_name ] ?? 'html'; + + // Skip unsupported elements. + if ( in_array( $node_name, array( 'br', 'cite', 'source' ), true ) ) { + continue; + } + + // Get the HTML content for this specific node instead of entire content. + $node_html = $dom->saveHTML( $node ); + + // Get block attributes based on node type. + $attributes = self::get_node_attributes( $node, $block ); + + $_content .= \get_comment_delimited_block_content( $block, $attributes, $node_html ); + } + // phpcs:enable WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase + + return $_content; + } + + /** + * Get block attributes for a DOM node based on block type. + * + * @param \DOMNode $node The DOM node. + * @param string $block_type The block type. + * + * @return array The block attributes. + */ + private static function get_node_attributes( $node, $block_type ) { + $attributes = array(); + + // phpcs:disable WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase + switch ( $block_type ) { + case 'heading': + $level = (int) substr( $node->nodeName, 1 ); // Extract number from h1, h2, etc. + if ( $level > 1 ) { + $attributes['level'] = $level; + } + break; + + case 'list': + if ( 'ol' === strtolower( $node->nodeName ) ) { + $attributes['ordered'] = true; + } + break; + + case 'image': + if ( $node instanceof \DOMElement ) { + if ( $node->hasAttribute( 'src' ) ) { + $attributes['url'] = $node->getAttribute( 'src' ); + } + if ( $node->hasAttribute( 'alt' ) ) { + $attributes['alt'] = $node->getAttribute( 'alt' ); + } + } + break; + } + // phpcs:enable WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase + + return $attributes; + } } diff --git a/includes/class-sanitize.php b/includes/class-sanitize.php index 32a3835a3..212febbd3 100644 --- a/includes/class-sanitize.php +++ b/includes/class-sanitize.php @@ -195,30 +195,10 @@ public static function content( $content ) { $content = \wpautop( $content ); $content = \wp_kses_post( $content ); - var_dump( $content ); - if ( ! site_supports_blocks() ) { return $content; } - $content = \preg_split( '/(
|
|<\/p>|' . PHP_EOL . ')/i', $content ); - $content = array_map( 'trim', $content ); - $content = array_map( - function ( $el ) { - $el = preg_replace( '/^

/i', '', $el ); - $el = preg_replace( '/<\/p>$/i', '', $el ); - - return $el; - }, - $content - ); - - $content = array_filter( $content ); - - if ( empty( $content ) ) { - return ''; - } - - return '' . PHP_EOL . '

' . implode( '

' . PHP_EOL . '' . PHP_EOL . PHP_EOL . '' . PHP_EOL . '

', $content ) . '

' . PHP_EOL . ''; + return Blocks::html_to_blocks( $content ); } } From 630aec6cb5f1dc9ff0fb0acf58b7f95e72bde8ea Mon Sep 17 00:00:00 2001 From: Matthias Pfefferle Date: Mon, 13 Oct 2025 12:16:16 +0200 Subject: [PATCH 04/35] Remove get_node_attributes method and inline list attribute The get_node_attributes private method was removed and the logic for setting the 'ordered' attribute on 'ol' nodes is now handled inline. This simplifies the code by reducing indirection and focusing only on the required attribute for ordered lists. --- includes/class-blocks.php | 52 +++++---------------------------------- 1 file changed, 6 insertions(+), 46 deletions(-) diff --git a/includes/class-blocks.php b/includes/class-blocks.php index 7415c0484..de24e4fc9 100644 --- a/includes/class-blocks.php +++ b/includes/class-blocks.php @@ -494,8 +494,9 @@ public static function html_to_blocks( $content ) { // phpcs:disable WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase foreach ( $body->childNodes as $node ) { - $node_name = strtolower( $node->nodeName ); - $block = $block_map[ $node_name ] ?? 'html'; + $node_name = strtolower( $node->nodeName ); + $block = $block_map[ $node_name ] ?? 'html'; + $attributes = array(); // Skip unsupported elements. if ( in_array( $node_name, array( 'br', 'cite', 'source' ), true ) ) { @@ -505,8 +506,9 @@ public static function html_to_blocks( $content ) { // Get the HTML content for this specific node instead of entire content. $node_html = $dom->saveHTML( $node ); - // Get block attributes based on node type. - $attributes = self::get_node_attributes( $node, $block ); + if ( 'ol' === strtolower( $node->nodeName ) ) { + $attributes['ordered'] = true; + } $_content .= \get_comment_delimited_block_content( $block, $attributes, $node_html ); } @@ -514,46 +516,4 @@ public static function html_to_blocks( $content ) { return $_content; } - - /** - * Get block attributes for a DOM node based on block type. - * - * @param \DOMNode $node The DOM node. - * @param string $block_type The block type. - * - * @return array The block attributes. - */ - private static function get_node_attributes( $node, $block_type ) { - $attributes = array(); - - // phpcs:disable WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase - switch ( $block_type ) { - case 'heading': - $level = (int) substr( $node->nodeName, 1 ); // Extract number from h1, h2, etc. - if ( $level > 1 ) { - $attributes['level'] = $level; - } - break; - - case 'list': - if ( 'ol' === strtolower( $node->nodeName ) ) { - $attributes['ordered'] = true; - } - break; - - case 'image': - if ( $node instanceof \DOMElement ) { - if ( $node->hasAttribute( 'src' ) ) { - $attributes['url'] = $node->getAttribute( 'src' ); - } - if ( $node->hasAttribute( 'alt' ) ) { - $attributes['alt'] = $node->getAttribute( 'alt' ); - } - } - break; - } - // phpcs:enable WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase - - return $attributes; - } } From ca3305e56f1fb2038415acf43458beebb2c3fdc7 Mon Sep 17 00:00:00 2001 From: Matthias Pfefferle Date: Mon, 13 Oct 2025 12:17:29 +0200 Subject: [PATCH 05/35] Rename html_to_blocks to convert_from_html in Blocks Refactored the Blocks class by renaming the html_to_blocks method to convert_from_html for improved clarity. Updated all references to use the new method name. --- includes/class-blocks.php | 2 +- includes/class-sanitize.php | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/includes/class-blocks.php b/includes/class-blocks.php index de24e4fc9..598310a32 100644 --- a/includes/class-blocks.php +++ b/includes/class-blocks.php @@ -446,7 +446,7 @@ public static function revert_embed_links( $block_content, $block ) { * * @return string The content converted to blocks. */ - public static function html_to_blocks( $content ) { + public static function convert_from_html( $content ) { if ( empty( $content ) ) { return ''; } diff --git a/includes/class-sanitize.php b/includes/class-sanitize.php index 212febbd3..5ab10ba73 100644 --- a/includes/class-sanitize.php +++ b/includes/class-sanitize.php @@ -199,6 +199,6 @@ public static function content( $content ) { return $content; } - return Blocks::html_to_blocks( $content ); + return Blocks::convert_from_html( $content ); } } From 5ab729b36cb6005138f3b6848ab97229050272ba Mon Sep 17 00:00:00 2001 From: Matthias Pfefferle Date: Mon, 13 Oct 2025 12:18:01 +0200 Subject: [PATCH 06/35] Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- includes/handler/class-update.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/includes/handler/class-update.php b/includes/handler/class-update.php index abb0c20a9..71afb2fab 100644 --- a/includes/handler/class-update.php +++ b/includes/handler/class-update.php @@ -121,7 +121,7 @@ public static function update_object( $activity, $user_id ) { * @param array $activity The ActivityPub activity data. * @param int $user_id The local user ID. * @param bool $success True on success, false otherwise. - * @param array|string|int|\WP_Error|false $result The updated comment, or null if update failed. + * @param array|string|int|\WP_Error|false $result The updated object, or null if update failed. */ \do_action( 'activitypub_handled_update', $activity, $user_id, $success, $result ); } From e302e9368892fa96b83f48eac3b6ab0832466906 Mon Sep 17 00:00:00 2001 From: Matthias Pfefferle Date: Mon, 13 Oct 2025 12:18:12 +0200 Subject: [PATCH 07/35] Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- includes/handler/class-create.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/includes/handler/class-create.php b/includes/handler/class-create.php index d0df11dee..f427d7f72 100644 --- a/includes/handler/class-create.php +++ b/includes/handler/class-create.php @@ -110,7 +110,7 @@ public static function create_object( $activity, $user_id, $activity_object = nu // If comment exists, call update action. if ( ! \is_wp_error( $check_dupe ) ) { /** - * Fires when a Create activity is received for an existing comment. + * Fires when a Create activity is received for an existing object. * * @param array $activity The activity-object. * @param int $user_id The id of the local blog-user. From 955921cafbd010317b6067e552a046b6bad1db2a Mon Sep 17 00:00:00 2001 From: Matthias Pfefferle Date: Mon, 13 Oct 2025 12:18:26 +0200 Subject: [PATCH 08/35] Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- includes/collection/class-objects.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/includes/collection/class-objects.php b/includes/collection/class-objects.php index 762b8914b..ca84dc282 100644 --- a/includes/collection/class-objects.php +++ b/includes/collection/class-objects.php @@ -29,7 +29,7 @@ class Objects { * * @param array $activity The activity object data. * - * @return WP_Post|\WP_Error The object post or WP_Error on failure. + * @return \WP_Post|\WP_Error The object post or WP_Error on failure. */ public static function add( $activity ) { $activity_object = $activity['object']; From d0fa8e18bcc804ef046602ede301046c553d1d74 Mon Sep 17 00:00:00 2001 From: Matthias Pfefferle Date: Mon, 13 Oct 2025 14:41:51 +0200 Subject: [PATCH 09/35] Add tests for content sanitization and block conversion Introduces new PHPUnit tests for content sanitization in Sanitize::content, covering scenarios such as block support, malicious content, URLs, empty content, and safe HTML preservation. Also adds a test for Blocks::convert_from_html to verify HTML-to-block conversion when blocks are supported. --- .../tests/includes/class-test-blocks.php | 20 ++++++ .../tests/includes/class-test-sanitize.php | 72 +++++++++++++++++++ 2 files changed, 92 insertions(+) diff --git a/tests/phpunit/tests/includes/class-test-blocks.php b/tests/phpunit/tests/includes/class-test-blocks.php index 783b1f0c9..01345251d 100644 --- a/tests/phpunit/tests/includes/class-test-blocks.php +++ b/tests/phpunit/tests/includes/class-test-blocks.php @@ -309,4 +309,24 @@ public function filter_pleroma_object( $response, $url ) { return $response; } + + /** + * Test content sanitization with HTML to blocks conversion. + * + * @covers ::convert_from_html + */ + public function test_convert_from_html() { + // Mock site_supports_blocks to return true. + \add_filter( 'wp_is_block_theme', '__return_true' ); + + $content = '

Test Heading

Test paragraph

Test image'; + $result = Blocks::convert_from_html( $content ); + + // Should convert to blocks when blocks are supported. + $this->assertIsString( $result ); + $this->assertStringContainsString( 'Test Heading', $result ); + $this->assertStringContainsString( 'Test paragraph', $result ); + + \remove_filter( 'wp_is_block_theme', '__return_true' ); + } } diff --git a/tests/phpunit/tests/includes/class-test-sanitize.php b/tests/phpunit/tests/includes/class-test-sanitize.php index f8c9860ae..aff8cff19 100644 --- a/tests/phpunit/tests/includes/class-test-sanitize.php +++ b/tests/phpunit/tests/includes/class-test-sanitize.php @@ -182,4 +182,76 @@ public function test_blog_identifier_with_existing_user() { \wp_delete_user( $user_id ); } + + /** + * Test content sanitization without blocks support. + * + * @covers ::content + */ + public function test_content_without_blocks() { + // Mock site_supports_blocks to return false. + add_filter( 'activitypub_site_supports_blocks', '__return_false' ); + + $content = '

Test Heading

Test paragraph

'; + $result = Sanitize::content( $content ); + + // Should not convert to blocks when blocks are not supported. + $this->assertStringNotContainsString( '