Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,14 @@ public function next_block_attribute() {
return isset( $this->block_attribute_paths[ $this->block_attribute_index ] );
}

public function get_block_attribute_path() {
if ( null === $this->block_attribute_paths || ! isset( $this->block_attribute_paths[ $this->block_attribute_index ] ) ) {
return false;
}

return $this->block_attribute_paths[ $this->block_attribute_index ];
}

/**
* Gets the key of the currently matched block attribute.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,22 +184,60 @@ private function next_url_attribute() {
private function next_url_block_attribute() {
while ( $this->next_block_attribute() ) {
$url_maybe = $this->get_block_attribute_value();
/*
* Do not use base URL for block attributes. to avoid false positives.
* When a base URL is present, any word is a valid URL relative to the
* base URL.
* When a base URL is missing, the string must start with a protocol to
* be considered a URL.
if ( ! is_string( $url_maybe ) ) {
// @TODO: support arrays, objects, and other non-string data structures.
continue;
}

if ( count( $this->get_block_attribute_path() ) > 1 ) {
// @TODO: support nested block attributes, even if only for core extenders.
continue;
}

/**
* Decide whether the current block attribute holds a URL.
*
* Known URL attributes can be assumed to hold a URL and be
* parsed with the base URL. For example, a "/about-us" value
* in a wp:navigation-link block's `url` attribute is a
* relative URL to the `/about-us` page.
*
* Other attributes may or may not contain URLs, but we cannot assume
* they do. A value `/about-us` could be a relative URL or a class name.
* In those cases, we'll let go of relative URLs and only detect
* absolute URLs to avoid treating every string as a URL. This requires
* parsing without a base URL.
*/
if ( is_string( $url_maybe ) ) {
$is_known_url_block_attribute = (
isset( self::URL_BLOCK_ATTRIBUTES[ $this->get_block_name() ] ) &&
in_array( $this->get_block_attribute_key(), self::URL_BLOCK_ATTRIBUTES[ $this->get_block_name() ], true )
);

$is_known_url_block_attribute = apply_filters(
'wp_data_liberation_is_known_url_block_attribute',
$is_known_url_block_attribute,
array(
'block_name' => $this->get_block_name(),
'attribute_key' => $this->get_block_attribute_key(),
)
);

$parsed_url = false;
if ( $is_known_url_block_attribute ) {
// Known URL attribute – let's parse with the base URL.
$parsed_url = WPURL::parse( $url_maybe, $this->base_url_string );
} else {
// Other attribute – let's parse without a base URL.
$parsed_url = WPURL::parse( $url_maybe );
if ( false !== $parsed_url ) {
$this->raw_url = $url_maybe;
$this->parsed_url = $parsed_url;
}

return true;
}
if ( false === $parsed_url ) {
continue;
}

$this->raw_url = $url_maybe;
$this->parsed_url = $parsed_url;
return true;
}

return false;
Expand Down Expand Up @@ -362,6 +400,16 @@ public function get_inspected_attribute_name() {
return $this->inspecting_html_attributes[ count( $this->inspecting_html_attributes ) - 1 ];
}

public const URL_BLOCK_ATTRIBUTES = array(
'wp:image' => array( 'url' ),
'wp:file' => array( 'href' ),
'wp:video' => array( 'url', 'src' ),
'wp:audio' => array( 'url', 'src' ),
'wp:cover' => array( 'url' ),
'wp:media-text' => array( 'url' ),
'wp:button' => array( 'url', 'linkTarget' ),
'wp:navigation-link' => array( 'url' ),
);

/**
* A list of HTML attributes meant to contain URLs, as defined in the HTML specification.
Expand Down
59 changes: 42 additions & 17 deletions components/DataLiberation/Tests/BlockMarkupUrlProcessorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,72 +16,96 @@ public function test_next_url_in_current_token_returns_false_when_no_url_is_foun
*
* @dataProvider provider_test_finds_next_url
*/
public function test_next_url_finds_the_url( $expected_result, $markup, $base_url = 'https://wordpress.org' ) {
public function test_next_url_finds_the_url( $expected_raw_url, $expected_absolute_url, $markup, $base_url = 'https://wordpress.org' ) {
$p = new BlockMarkupUrlProcessor( $markup, $base_url );
$this->assertTrue( $p->next_url(), 'Failed to find the URL in the markup.' );
$this->assertEquals( $expected_result, $p->get_raw_url(), 'Found a URL in the markup, but it wasn\'t the expected one.' );
$this->assertEquals( $expected_raw_url, $p->get_raw_url(), 'Found a URL in the markup, but it wasn\'t the expected one.' );
$this->assertEquals( $expected_absolute_url, $p->get_parsed_url()->toString(), 'Found a URL in the markup, but it wasn\'t the expected one.' );
}

public static function provider_test_finds_next_url() {
return array(
'In the <a> tag' => array(
'https://wordpress.org',
'https://wordpress.org/',
'<a href="https://wordpress.org">',
),
'In the second block attribute, when it contains just the URL' => array(
'https://mysite.com/wp-content/image.png',
'<!-- wp:image {"class": "wp-bold", "src": "https://mysite.com/wp-content/image.png"} -->',
'In the wp:image url attribute when it is the first block attribute and contains a relative URL' => array(
'/wp-content/image.png',
'https://wordpress.org/wp-content/image.png',
'<!-- wp:image {"url": "/wp-content/image.png"} -->',
),
'In the first block attribute, when it contains just the URL' => array(
'In the wp:image url attribute when it is the second block attribute and contains just the URL' => array(
'https://mysite.com/wp-content/image.png',
'<!-- wp:image {"src": "https://mysite.com/wp-content/image.png"} -->',
),
'In a block attribute, in a nested object, when it contains just the URL' => array(
'https://mysite.com/wp-content/image.png',
'<!-- wp:image {"class": "wp-bold", "meta": { "src": "https://mysite.com/wp-content/image.png" } } -->',
),
'In a block attribute, in an array, when it contains just the URL' => array(
'https://mysite.com/wp-content/image.png',
'<!-- wp:image {"class": "wp-bold", "srcs": [ "https://mysite.com/wp-content/image.png" ] } -->',
'<!-- wp:image {"class": "wp-bold", "url": "https://mysite.com/wp-content/image.png"} -->',
),
'In a text node, when it contains a well-formed absolute URL' => array(
'https://wordpress.org',
'https://wordpress.org/',
'Have you seen https://wordpress.org? ',
),
'In a text node after a tag' => array(
'wordpress.org',
'https://wordpress.org/',
'<p>Have you seen wordpress.org',
),
'In a text node, when it contains a protocol-relative absolute URL' => array(
'//wordpress.org',
'https://wordpress.org/',
'Have you seen //wordpress.org? ',
),
'In a text node, when it contains a domain-only absolute URL' => array(
'wordpress.org',
'https://wordpress.org/',
'Have you seen wordpress.org? ',
),
'In a text node, when it contains a domain-only absolute URL with path' => array(
'wordpress.org/plugins',
'https://wordpress.org/plugins',
'Have you seen wordpress.org/plugins? ',
),
'Matches an empty string in <a href=""> as a valid relative URL when given a base URL' => array(
'',
'https://wordpress.org/',
'<a href=""></a>',
'https://wordpress.org',
'https://wordpress.org/',
),
'Skips over an empty string in <a href=""> when not given a base URL' => array(
'https://developer.w.org',
'https://developer.w.org/',
'<a href=""></a><a href="https://developer.w.org"></a>',
null,
),
'Skips over a class name in the <a> tag' => array(
'https://developer.w.org',
'https://developer.w.org/',
'<a class="http://example.com" href="https://developer.w.org"></a>',
null,
),
);
}

/**
*
* @dataProvider provider_test_finds_next_negative_url
*/
public function test_next_url_finds_the_negative_url( $markup, $base_url = 'https://wordpress.org' ) {
$p = new BlockMarkupUrlProcessor( $markup, $base_url );
$this->assertFalse( $p->next_url(), 'Found a URL in the markup, but it wasn\'t the expected one.' );
}

public static function provider_test_finds_next_negative_url() {
return array(
'In a block attribute, in a nested object, when it contains just the URL' => array(
'<!-- wp:image {"class": "wp-bold", "meta": { "src": "https://mysite.com/wp-content/image.png" } } -->',
),
'In a block attribute, in an array, when it contains just the URL' => array(
'<!-- wp:image {"class": "wp-bold", "srcs": [ "https://mysite.com/wp-content/image.png" ] } -->',
),
);
}

/**
* @dataProvider provider_test_parse_url_with_base_url
*/
Expand Down Expand Up @@ -180,7 +204,7 @@ public static function provider_test_set_url_examples() {
public function test_set_url_complex_test_case() {
$p = new BlockMarkupUrlProcessor(
<<<HTML
<!-- wp:image {"src": "https://mysite.com/wp-content/image.png", "meta": {"src": "https://mysite.com/wp-content/image.png"}} -->
<!-- wp:image {"url": "https://mysite.com/wp-content/image.png", "meta": {"src": "https://mysite.com/wp-content/image.png"}} -->
<img src="https://mysite.com/wp-content/image.png">
<!-- /wp:image -->

Expand All @@ -204,9 +228,10 @@ public function test_set_url_complex_test_case() {
$p->set_url( 'https://site-export.internal', WPURL::parse( 'https://site-export.internal' ) );
}

// meta.src is a nested property and not supported yet
$this->assertEquals(
<<<HTML
<!-- wp:image {"src":"https:\/\/site-export.internal","meta":{"src":"https:\/\/site-export.internal"}} -->
<!-- wp:image {"url":"https:\/\/site-export.internal","meta":{"src":"https:\/\/mysite.com\/wp-content\/image.png"}} -->
<img src="https://site-export.internal">
<!-- /wp:image -->

Expand Down
Loading