Skip to content

Commit 1f9dac0

Browse files
authored
Detect relative URLs in known block attributes (#188)
Gives `BlockMarkupUrlProcessor` knowledge about which block attributes are designed to contain a relative URL. Known URL attributes can be assumed to hold a URL and be parsed with the base URL. For example, a `/about-us` value in a `wp:navigation-link` block’s url attribute is a relative URL to the `/about-us` page. Other attributes may or may not contain URLs, and we cannot assume they do. A value like `/about-us` could be a relative URL or a class name. In those cases we’ll ignore relative URLs and only detect absolute URLs to avoid treating every string as a URL; this requires parsing without a base URL. Related to WordPress/wordpress-playground#1780 ## Implementation details This PR ships a list of all block attributes that are meant to hold a URL. It's similar to how the HTML spec declares a list of all [HTML attributes meant to hold a URL](https://html.spec.whatwg.org/multipage/indices.html#attributes-1). It also ships a filter the extenders can use to nudge the URL rewriter to treat a block attribute as either: ```php $is_relative_url_block_attribute = apply_filters( 'url_processor_is_relative_url_block_attribute', $is_relative_url_block_attribute, array( 'block_name' => $this->get_block_name(), 'attribute_key' => $this->get_block_attribute_key(), ) ) ``` A hypothetical plugin shipping a `wp:custom-image` block could nudge the URL parser to treat its `src` attribute as a relative URL by registering the following filter: ```php <?php function recognize_custom_image_block_attribute_as_relative_url ( $is_known, $context ) { if ( 'wp:custom-image' === $context['block_name'] && 'src' === $context['attribute_key'] ) { return true; } return $is_known; } add_filter( 'url_processor_is_relative_url_block_attribute', 'recognize_custom_image_block_attribute_as_relative_url', 10, 2 ); ``` ### Other changes This PR renames two constants for clarity: * `URL_ATTRIBUTES` -> `HTML_ATTRIBUTES_TO_ACCEPT_RELATIVE_URLS_FROM` * `URL_ATTRIBUTES_WITH_SUBSYNTAX` -> `HTML_ATTRIBUTES_WITH_SUBSYNTAX_TO_ACCEPT_RELATIVE_URLS_FROM` * `URL_CONTAINING_TAGS_WITH_SUBSYNTAX` -> `HTML_TAGS_WITH_SUBSYNTAX_TO_ACCEPT_RELATIVE_URLS_FROM` ## Testing CI – see the new tests shipped with this PR.
1 parent 9196728 commit 1f9dac0

File tree

3 files changed

+140
-34
lines changed

3 files changed

+140
-34
lines changed

components/DataLiberation/BlockMarkup/class-blockmarkupprocessor.php

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,14 @@ public function next_block_attribute() {
527527
return isset( $this->block_attribute_paths[ $this->block_attribute_index ] );
528528
}
529529

530+
protected function get_block_attribute_path() {
531+
if ( null === $this->block_attribute_paths || ! isset( $this->block_attribute_paths[ $this->block_attribute_index ] ) ) {
532+
return false;
533+
}
534+
535+
return $this->block_attribute_paths[ $this->block_attribute_index ];
536+
}
537+
530538
/**
531539
* Gets the key of the currently matched block attribute.
532540
*

components/DataLiberation/BlockMarkup/class-blockmarkupurlprocessor.php

Lines changed: 90 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ private function next_url_in_text_node() {
132132
private function next_url_attribute() {
133133
$tag = $this->get_tag();
134134

135-
if ( ! array_key_exists( $tag, self::URL_ATTRIBUTES ) ) {
135+
if ( ! array_key_exists( $tag, self::HTML_ATTRIBUTES_TO_ACCEPT_RELATIVE_URLS_FROM ) ) {
136136
return false;
137137
}
138138

@@ -142,7 +142,7 @@ private function next_url_attribute() {
142142
* for the current token. The last element is the attribute we'll
143143
* inspect in the while() loop below.
144144
*/
145-
$this->inspecting_html_attributes = self::URL_ATTRIBUTES[ $tag ];
145+
$this->inspecting_html_attributes = self::HTML_ATTRIBUTES_TO_ACCEPT_RELATIVE_URLS_FROM[ $tag ];
146146
} else {
147147
/**
148148
* Forget the attribute we've inspected on the previous call to
@@ -184,22 +184,75 @@ private function next_url_attribute() {
184184
private function next_url_block_attribute() {
185185
while ( $this->next_block_attribute() ) {
186186
$url_maybe = $this->get_block_attribute_value();
187-
/*
188-
* Do not use base URL for block attributes. to avoid false positives.
189-
* When a base URL is present, any word is a valid URL relative to the
190-
* base URL.
191-
* When a base URL is missing, the string must start with a protocol to
192-
* be considered a URL.
187+
if ( ! is_string( $url_maybe ) ||
188+
count( $this->get_block_attribute_path() ) > 1
189+
) {
190+
// @TODO: support arrays, objects, and other non-string data structures.
191+
continue;
192+
}
193+
194+
/**
195+
* Decide whether the current block attribute holds a URL.
196+
*
197+
* Known URL attributes can be assumed to hold a URL and be
198+
* parsed with the base URL. For example, a "/about-us" value
199+
* in a wp:navigation-link block's `url` attribute is a
200+
* relative URL to the `/about-us` page.
201+
*
202+
* Other attributes may or may not contain URLs, but we cannot assume
203+
* they do. A value `/about-us` could be a relative URL or a class name.
204+
* In those cases, we'll let go of relative URLs and only detect
205+
* absolute URLs to avoid treating every string as a URL. This requires
206+
* parsing without a base URL.
207+
*/
208+
$is_relative_url_block_attribute = (
209+
isset( self::BLOCK_ATTRIBUTES_TO_ACCEPT_RELATIVE_URLS_FROM[ $this->get_block_name() ] ) &&
210+
in_array( $this->get_block_attribute_key(), self::BLOCK_ATTRIBUTES_TO_ACCEPT_RELATIVE_URLS_FROM[ $this->get_block_name() ], true )
211+
);
212+
213+
/**
214+
* Filters whether a block attribute is known to contain a relative URL.
215+
*
216+
* This filter allows extending the list of block attributes that are
217+
* recognized as containing URLs. When a block attribute is marked as
218+
* a known URL attribute, it will be parsed with the base URL, allowing
219+
* relative URLs to be properly resolved.
220+
*
221+
* @since 6.8.0
222+
*
223+
* @param bool $is_relative_url_block_attribute Whether the block attribute is known to contain a relative URL.
224+
* @param array $context {
225+
* Context information about the block attribute.
226+
*
227+
* @type string $block_name The name of the block (e.g., 'wp:image', 'wp:button').
228+
* @type string $attribute_name The name of the attribute (e.g., 'url', 'href').
229+
* }
193230
*/
194-
if ( is_string( $url_maybe ) ) {
231+
$is_relative_url_block_attribute = apply_filters(
232+
'url_processor_is_relative_url_block_attribute',
233+
$is_relative_url_block_attribute,
234+
array(
235+
'block_name' => $this->get_block_name(),
236+
'attribute_name' => $this->get_block_attribute_key(),
237+
)
238+
);
239+
240+
$parsed_url = false;
241+
if ( $is_relative_url_block_attribute ) {
242+
// Known relative URL attribute – let's parse with the base URL.
243+
$parsed_url = WPURL::parse( $url_maybe, $this->base_url_string );
244+
} else {
245+
// Other attributes – let's parse without a base URL (and only detect absolute URLs).
195246
$parsed_url = WPURL::parse( $url_maybe );
196-
if ( false !== $parsed_url ) {
197-
$this->raw_url = $url_maybe;
198-
$this->parsed_url = $parsed_url;
247+
}
199248

200-
return true;
201-
}
249+
if ( false === $parsed_url ) {
250+
continue;
202251
}
252+
253+
$this->raw_url = $url_maybe;
254+
$this->parsed_url = $parsed_url;
255+
return true;
203256
}
204257

205258
return false;
@@ -362,6 +415,26 @@ public function get_inspected_attribute_name() {
362415
return $this->inspecting_html_attributes[ count( $this->inspecting_html_attributes ) - 1 ];
363416
}
364417

418+
/**
419+
* A list of block attributes that are known to contain URLs.
420+
*
421+
* It covers WordPress core blocks as of WordPress version 6.9. It can be
422+
* extended by plugins and themes via the "url_processor_is_relative_url_block_attribute"
423+
* filter.
424+
*
425+
* @var array
426+
*/
427+
public const BLOCK_ATTRIBUTES_TO_ACCEPT_RELATIVE_URLS_FROM = array(
428+
'wp:button' => array( 'url', 'linkTarget' ),
429+
'wp:cover' => array( 'url' ),
430+
'wp:embed' => array( 'url' ),
431+
'wp:gallery' => array( 'url', 'fullUrl' ),
432+
'wp:image' => array( 'url', 'src', 'href' ),
433+
'wp:media-text' => array( 'mediaUrl', 'href' ),
434+
'wp:navigation-link' => array( 'url' ),
435+
'wp:navigation-submenu' => array( 'url' ),
436+
'wp:rss' => array( 'feedURL' ),
437+
);
365438

366439
/**
367440
* A list of HTML attributes meant to contain URLs, as defined in the HTML specification.
@@ -370,7 +443,7 @@ public function get_inspected_attribute_name() {
370443
* See https://html.spec.whatwg.org/multipage/indices.html#attributes-1.
371444
* See https://stackoverflow.com/questions/2725156/complete-list-of-html-tag-attributes-which-have-a-url-value.
372445
*/
373-
public const URL_ATTRIBUTES = array(
446+
public const HTML_ATTRIBUTES_TO_ACCEPT_RELATIVE_URLS_FROM = array(
374447
'A' => array( 'href' ),
375448
'APPLET' => array( 'codebase', 'archive' ),
376449
'AREA' => array( 'href' ),
@@ -405,7 +478,7 @@ public function get_inspected_attribute_name() {
405478
* @TODO: Either explicitly support these attributes, or explicitly drop support for
406479
* handling their subsyntax. A generic URL matcher might be good enough.
407480
*/
408-
public const URL_ATTRIBUTES_WITH_SUBSYNTAX = array(
481+
public const HTML_ATTRIBUTES_WITH_SUBSYNTAX_TO_ACCEPT_RELATIVE_URLS_FROM = array(
409482
'*' => array( 'style' ), // background(), background-image().
410483
'APPLET' => array( 'archive' ),
411484
'IMG' => array( 'srcset' ),
@@ -425,7 +498,7 @@ public function get_inspected_attribute_name() {
425498
* @TODO: Either explicitly support these tags, or explicitly drop support for
426499
* handling their subsyntax. A generic URL matcher might be good enough.
427500
*/
428-
public const URL_CONTAINING_TAGS_WITH_SUBSYNTAX = array(
501+
public const HTML_TAGS_WITH_SUBSYNTAX_TO_ACCEPT_RELATIVE_URLS_FROM = array(
429502
'STYLE',
430503
'SCRIPT',
431504
);

components/DataLiberation/Tests/BlockMarkupUrlProcessorTest.php

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,72 +16,96 @@ public function test_next_url_in_current_token_returns_false_when_no_url_is_foun
1616
*
1717
* @dataProvider provider_test_finds_next_url
1818
*/
19-
public function test_next_url_finds_the_url( $expected_result, $markup, $base_url = 'https://wordpress.org' ) {
19+
public function test_next_url_finds_the_url( $expected_raw_url, $expected_absolute_url, $markup, $base_url = 'https://wordpress.org' ) {
2020
$p = new BlockMarkupUrlProcessor( $markup, $base_url );
2121
$this->assertTrue( $p->next_url(), 'Failed to find the URL in the markup.' );
22-
$this->assertEquals( $expected_result, $p->get_raw_url(), 'Found a URL in the markup, but it wasn\'t the expected one.' );
22+
$this->assertEquals( $expected_raw_url, $p->get_raw_url(), 'Found a URL in the markup, but it wasn\'t the expected one.' );
23+
$this->assertEquals( $expected_absolute_url, $p->get_parsed_url()->toString(), 'Found a URL in the markup, but it wasn\'t the expected one.' );
2324
}
2425

2526
public static function provider_test_finds_next_url() {
2627
return array(
2728
'In the <a> tag' => array(
2829
'https://wordpress.org',
30+
'https://wordpress.org/',
2931
'<a href="https://wordpress.org">',
3032
),
31-
'In the second block attribute, when it contains just the URL' => array(
32-
'https://mysite.com/wp-content/image.png',
33-
'<!-- wp:image {"class": "wp-bold", "src": "https://mysite.com/wp-content/image.png"} -->',
33+
'In the wp:image url attribute when it is the first block attribute and contains a relative URL' => array(
34+
'/wp-content/image.png',
35+
'https://wordpress.org/wp-content/image.png',
36+
'<!-- wp:image {"url": "/wp-content/image.png"} -->',
3437
),
35-
'In the first block attribute, when it contains just the URL' => array(
38+
'In the wp:image url attribute when it is the second block attribute and contains just the URL' => array(
3639
'https://mysite.com/wp-content/image.png',
37-
'<!-- wp:image {"src": "https://mysite.com/wp-content/image.png"} -->',
38-
),
39-
'In a block attribute, in a nested object, when it contains just the URL' => array(
4040
'https://mysite.com/wp-content/image.png',
41-
'<!-- wp:image {"class": "wp-bold", "meta": { "src": "https://mysite.com/wp-content/image.png" } } -->',
42-
),
43-
'In a block attribute, in an array, when it contains just the URL' => array(
44-
'https://mysite.com/wp-content/image.png',
45-
'<!-- wp:image {"class": "wp-bold", "srcs": [ "https://mysite.com/wp-content/image.png" ] } -->',
41+
'<!-- wp:image {"class": "wp-bold", "url": "https://mysite.com/wp-content/image.png"} -->',
4642
),
4743
'In a text node, when it contains a well-formed absolute URL' => array(
4844
'https://wordpress.org',
45+
'https://wordpress.org/',
4946
'Have you seen https://wordpress.org? ',
5047
),
5148
'In a text node after a tag' => array(
5249
'wordpress.org',
50+
'https://wordpress.org/',
5351
'<p>Have you seen wordpress.org',
5452
),
5553
'In a text node, when it contains a protocol-relative absolute URL' => array(
5654
'//wordpress.org',
55+
'https://wordpress.org/',
5756
'Have you seen //wordpress.org? ',
5857
),
5958
'In a text node, when it contains a domain-only absolute URL' => array(
6059
'wordpress.org',
60+
'https://wordpress.org/',
6161
'Have you seen wordpress.org? ',
6262
),
6363
'In a text node, when it contains a domain-only absolute URL with path' => array(
6464
'wordpress.org/plugins',
65+
'https://wordpress.org/plugins',
6566
'Have you seen wordpress.org/plugins? ',
6667
),
6768
'Matches an empty string in <a href=""> as a valid relative URL when given a base URL' => array(
6869
'',
70+
'https://wordpress.org/',
6971
'<a href=""></a>',
70-
'https://wordpress.org',
72+
'https://wordpress.org/',
7173
),
7274
'Skips over an empty string in <a href=""> when not given a base URL' => array(
7375
'https://developer.w.org',
76+
'https://developer.w.org/',
7477
'<a href=""></a><a href="https://developer.w.org"></a>',
7578
null,
7679
),
7780
'Skips over a class name in the <a> tag' => array(
7881
'https://developer.w.org',
82+
'https://developer.w.org/',
7983
'<a class="http://example.com" href="https://developer.w.org"></a>',
8084
null,
8185
),
8286
);
8387
}
8488

89+
/**
90+
*
91+
* @dataProvider provider_test_finds_next_negative_url
92+
*/
93+
public function test_next_url_finds_the_negative_url( $markup, $base_url = 'https://wordpress.org' ) {
94+
$p = new BlockMarkupUrlProcessor( $markup, $base_url );
95+
$this->assertFalse( $p->next_url(), 'Found a URL in the markup, but it wasn\'t the expected one.' );
96+
}
97+
98+
public static function provider_test_finds_next_negative_url() {
99+
return array(
100+
'In a block attribute, in a nested object, when it contains just the URL' => array(
101+
'<!-- wp:image {"class": "wp-bold", "meta": { "src": "https://mysite.com/wp-content/image.png" } } -->',
102+
),
103+
'In a block attribute, in an array, when it contains just the URL' => array(
104+
'<!-- wp:image {"class": "wp-bold", "srcs": [ "https://mysite.com/wp-content/image.png" ] } -->',
105+
),
106+
);
107+
}
108+
85109
/**
86110
* @dataProvider provider_test_parse_url_with_base_url
87111
*/
@@ -180,7 +204,7 @@ public static function provider_test_set_url_examples() {
180204
public function test_set_url_complex_test_case() {
181205
$p = new BlockMarkupUrlProcessor(
182206
<<<HTML
183-
<!-- wp:image {"src": "https://mysite.com/wp-content/image.png", "meta": {"src": "https://mysite.com/wp-content/image.png"}} -->
207+
<!-- wp:image {"url": "https://mysite.com/wp-content/image.png", "meta": {"src": "https://mysite.com/wp-content/image.png"}} -->
184208
<img src="https://mysite.com/wp-content/image.png">
185209
<!-- /wp:image -->
186210
@@ -204,9 +228,10 @@ public function test_set_url_complex_test_case() {
204228
$p->set_url( 'https://site-export.internal', WPURL::parse( 'https://site-export.internal' ) );
205229
}
206230

231+
// meta.src is a nested property and not supported yet
207232
$this->assertEquals(
208233
<<<HTML
209-
<!-- wp:image {"src":"https:\/\/site-export.internal","meta":{"src":"https:\/\/site-export.internal"}} -->
234+
<!-- wp:image {"url":"https:\/\/site-export.internal","meta":{"src":"https:\/\/mysite.com\/wp-content\/image.png"}} -->
210235
<img src="https://site-export.internal">
211236
<!-- /wp:image -->
212237

0 commit comments

Comments
 (0)