Skip to content

Commit

Permalink
Update URLUtil test to adapt to a change in the public suffix list
Browse files Browse the repository at this point in the history
  • Loading branch information
sebastian-nagel committed Nov 28, 2024
1 parent e8469bc commit 1ae0a11
Showing 1 changed file with 14 additions and 4 deletions.
18 changes: 14 additions & 4 deletions src/test/org/apache/nutch/util/TestURLUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,13 @@ public void testGetDomainName() throws Exception {
Assert.assertEquals("example.2000.hu", URLUtil.getDomainName(url));

// test non-ascii
url = new URL("http://www.example.商業.tw");
Assert.assertEquals("example.商業.tw", URLUtil.getDomainName(url));
url = new URL("http://www.example.flå.no");
Assert.assertEquals("example.flå.no", URLUtil.getDomainName(url));
url = new URL("http://www.example.栃木.jp");
Assert.assertEquals("example.栃木.jp", URLUtil.getDomainName(url));
// broken by https://github.com/publicsuffix/list/commit/408a7b0bdec993884865baaa2f0d14cc9a060885
// url = new URL("http://www.example.商業.tw");
// Assert.assertEquals("example.商業.tw", URLUtil.getDomainName(url));

// test URL without host/authority
url = new URL("file:/path/index.html");
Expand Down Expand Up @@ -141,8 +146,13 @@ public void testGetDomainSuffix() throws Exception {
Assert.assertEquals("2000.hu", URLUtil.getDomainSuffix(url));

// test non-ascii
url = new URL("http://www.example.商業.tw");
Assert.assertEquals("xn--czrw28b.tw", URLUtil.getDomainSuffix(url));
url = new URL("http://www.example.flå.no");
Assert.assertEquals("xn--fl-zia.no", URLUtil.getDomainSuffix(url));
url = new URL("http://www.example.栃木.jp");
Assert.assertEquals("xn--4pvxs.jp", URLUtil.getDomainSuffix(url));
// broken by https://github.com/publicsuffix/list/commit/408a7b0bdec993884865baaa2f0d14cc9a060885
// url = new URL("http://www.example.商業.tw");
// Assert.assertEquals("xn--czrw28b.tw", URLUtil.getDomainSuffix(url));
}

@Test
Expand Down

0 comments on commit 1ae0a11

Please sign in to comment.