Skip to content

Commit 367897e

Browse files
committed
Fixed the implementation to handle "*" sub-domains.
Updated the public_suffix_list.dat to the latest. Added an `mk` script to easily rebuild the library and tools. Allow the public_suffix_list.dat to be in the tests/... sub-folder. Updated the tests to pass with the latest version. Added a test to verify specific URLs (in tld_test.c). Fixed file description at the top of two tests. Added the .gitignore file. Replaced many tabs with spaces so diffs as expected. Updated the copyright year.
1 parent 7ded816 commit 367897e

26 files changed

+3325
-1169
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
*.sw?
2+
gmon.out
3+
tmp

README.md

+2
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ The documentation can also be generated from the source using Doxygen.
3939

4040
# Notes:
4141

42+
* Version 1.6.0 properly handles sub-domains defined with an asterisk.
43+
4244
* Version 1.5.9 has been updated to the latest version of the world TLDs.
4345
It also includes a fix to the email code so spaces are forbidden.
4446

debian/changelog

+15
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,18 @@
1+
libtld (1.6.0.0~xenial) xenial; urgency=high
2+
3+
* Fixed the implementation to properly support "*" sub-domains.
4+
* Updated the public_suffix_list.dat to the latest.
5+
* Added an `mk` script to easily rebuild the library and tools.
6+
* Allow the public_suffix_list.dat to be in the tests/... sub-folder.
7+
* Updated the tests to pass with the latest version.
8+
* Added a test to verify specific URLs (in tld_test.c).
9+
* Fixed file description at the top of two tests.
10+
* Added the .gitignore file.
11+
* Replaced many tabs with spaces so diffs as expected.
12+
* Updated the copyright year.
13+
14+
-- Alexis Wilke <[email protected]> Fri, 07 May 2021 17:09:15 -0700
15+
116
libtld (1.5.14.0~xenial) xenial; urgency=high
217

318
* Fixed the compile of QString() with QChar() that does not offer a `char`.

debian/copyright

+3-3
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@ Source: https://snapwebsites.org/project/libtld
66
License: MIT
77
Disclaimer: This package is part of the Snap! C++ system. It is not a native Debian package.
88
Copyright:
9-
Copyright (c) 2011-2019 Made to Order Software Corp. All Rights Reserved
9+
Copyright (c) 2011-2021 Made to Order Software Corp. All Rights Reserved
1010

1111
Files: tests/public_suffix_list.dat
12-
Copyright: Copyright (c) 2007-2017 Mozilla Foundation
12+
Copyright: Copyright (c) 2007-2021 Mozilla Foundation
1313
License: Mozilla Public License, v. 2.0
1414
Mozilla Public License
1515
Version 2.0
@@ -369,5 +369,5 @@ License: Mozilla Public License, v. 2.0
369369
as defined by the Mozilla Public License, v. 2.0.
370370

371371
Files: CMakeLists.txt debian/* dev/* doc/* include/* package/* php/* src/* tests/*
372-
Copyright: Copyright (c) 2011-2019 Made to Order Software Corp. All Rights Reserved
372+
Copyright: Copyright (c) 2011-2021 Made to Order Software Corp. All Rights Reserved
373373
License: MIT

dev/libtld-only-CMakeLists.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@
2828
#
2929

3030
set(LIBTLD_VERSION_MAJOR 1)
31-
set(LIBTLD_VERSION_MINOR 5)
32-
set(LIBTLD_VERSION_PATCH 11)
31+
set(LIBTLD_VERSION_MINOR 6)
32+
set(LIBTLD_VERSION_PATCH 0)
3333

3434
cmake_minimum_required(VERSION 2.8.4)
3535
project(tld)

mk

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#!/bin/sh
2+
#
3+
# Sample script to run make without having to retype the long path each time
4+
# This will work if you built the environment using our ~/bin/build-snap script
5+
6+
PROJECT=libtld
7+
PROCESSORS="`nproc`"
8+
BUILDDIR="../../../BUILD/contrib/${PROJECT}"
9+
10+
# "Brief Version" -- for the documentation
11+
VERSION=`dpkg-parsechangelog --show-field Version | sed -e 's/~.*//' -e 's/\(^[0-9]\+\.[0-9]\+\).*/\1/'`
12+
13+
14+
case $1 in
15+
"-l")
16+
make -C ${BUILDDIR} 2>&1 | less -SR
17+
;;
18+
19+
"-d")
20+
rm -rf ${BUILDDIR}/doc/${PROJECT}-doc-${VERSION}.tar.gz
21+
make -C ${BUILDDIR}
22+
;;
23+
24+
"-i")
25+
make -j${PROCESSORS} -C ${BUILDDIR} install
26+
;;
27+
28+
"-t")
29+
(
30+
if make -j${PROCESSORS} -C ${BUILDDIR}
31+
then
32+
shift
33+
${BUILDDIR}/tests/unittest --progress $*
34+
fi
35+
) 2>&1 | less -SR
36+
;;
37+
38+
"-r")
39+
make -j${PROCESSORS} -C ${BUILDDIR}
40+
;;
41+
42+
"")
43+
make -j${PROCESSORS} -C ${BUILDDIR}
44+
;;
45+
46+
*)
47+
echo "error: unknown command line option \"$1\""
48+
;;
49+
50+
esac

src/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# File: src/CMakeLists.txt
33
# Object: Definitions to create the build environment with cmake
44
#
5-
# Copyright: Copyright (c) 2011-2019 Made to Order Software Corp. All Rights Reserved
5+
# Copyright: Copyright (c) 2011-2021 Made to Order Software Corp. All Rights Reserved
66
#
77
# https://snapwebsites.org/project/libtld
88

src/tld.c

+53-30
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/* TLD library -- TLD, domain name, and sub-domain extraction
2-
* Copyright (c) 2011-2019 Made to Order Software Corp. All Rights Reserved
2+
* Copyright (c) 2011-2021 Made to Order Software Corp. All Rights Reserved
33
*
44
* Permission is hereby granted, free of charge, to any person obtaining a
55
* copy of this software and associated documentation files (the
@@ -327,12 +327,13 @@
327327
*/
328328
static int cmp(const char *a, const char *b, int n)
329329
{
330-
/* if `a == "*"` then it always a match! */
330+
/* if `a == "*"` then we have a bug in the table
331331
if(a[0] == '*'
332332
&& a[1] == '\0')
333333
{
334334
return 0;
335335
}
336+
*/
336337

337338
/* n represents the maximum number of characters to check in b */
338339
while(n > 0 && *a != '\0')
@@ -395,32 +396,43 @@ static int cmp(const char *a, const char *b, int n)
395396
*/
396397
int search(int i, int j, const char *domain, int n)
397398
{
398-
int p, r;
399+
int auto_match = -1, p, r;
399400
const struct tld_description *tld;
400401

401-
while(i < j)
402+
if(i < j)
402403
{
403-
p = (j - i) / 2 + i;
404-
tld = tld_descriptions + p;
405-
r = cmp(tld->f_tld, domain, n);
406-
if(r < 0)
404+
/* the "*" breaks the binary search, we have to handle it specially */
405+
tld = tld_descriptions + i;
406+
if(tld->f_tld[0] == '*' && tld->f_tld[1] == '\0')
407407
{
408-
/* eliminate the first half */
409-
i = p + 1;
408+
auto_match = i;
409+
++i;
410410
}
411-
else if(r > 0)
412-
{
413-
/* eliminate the second half */
414-
j = p;
415-
}
416-
else
411+
412+
while(i < j)
417413
{
418-
/* match */
419-
return p;
414+
p = (j - i) / 2 + i;
415+
tld = tld_descriptions + p;
416+
r = cmp(tld->f_tld, domain, n);
417+
if(r < 0)
418+
{
419+
/* eliminate the first half */
420+
i = p + 1;
421+
}
422+
else if(r > 0)
423+
{
424+
/* eliminate the second half */
425+
j = p;
426+
}
427+
else
428+
{
429+
/* match */
430+
return p;
431+
}
420432
}
421433
}
422434

423-
return -1;
435+
return auto_match;
424436
}
425437

426438

@@ -556,7 +568,7 @@ enum tld_result tld(const char *uri, struct tld_info *info)
556568
{
557569
const char *end = uri;
558570
const char **level_ptr;
559-
int level = 0, start_level, i, r, p;
571+
int level = 0, start_level, i, r, p, offset;
560572
enum tld_result result;
561573

562574
/* set defaults in the info structure */
@@ -598,7 +610,7 @@ enum tld_result tld(const char *uri, struct tld_info *info)
598610
}
599611
++end;
600612
}
601-
/* if level is not at least 1 then there are no period */
613+
/* if level is not at least 1 then there are no periods */
602614
if(level == 0)
603615
{
604616
/* no TLD */
@@ -633,6 +645,7 @@ enum tld_result tld(const char *uri, struct tld_info *info)
633645
p = r;
634646
--level;
635647
}
648+
offset = (int) (level_ptr[level] - uri);
636649

637650
/* if there are exceptions we may need to search those now if level is 0 */
638651
if(level == 0)
@@ -644,29 +657,39 @@ enum tld_result tld(const char *uri, struct tld_info *info)
644657
if(r != -1)
645658
{
646659
p = r;
660+
offset = 0;
647661
}
648662
}
649663

650664
info->f_status = tld_descriptions[p].f_status;
651-
result = info->f_status == TLD_STATUS_VALID
652-
? TLD_RESULT_SUCCESS
653-
: TLD_RESULT_INVALID;
654-
655-
/* did we hit an exception? */
656-
if(tld_descriptions[p].f_status == TLD_STATUS_EXCEPTION)
665+
switch(info->f_status)
657666
{
658-
/* return the actual TLD and not the exception */
667+
case TLD_STATUS_VALID:
668+
result = TLD_RESULT_SUCCESS;
669+
break;
670+
671+
case TLD_STATUS_EXCEPTION:
672+
/* return the actual TLD and not the exception
673+
* i.e. "nacion.ar" is valid and the TLD is just ".ar"
674+
* even though top level ".ar" is forbidden by default
675+
*/
659676
p = tld_descriptions[p].f_exception_apply_to;
660677
level = start_level - tld_descriptions[p].f_exception_level;
678+
offset = (int) (level_ptr[level] - uri);
661679
info->f_status = TLD_STATUS_VALID;
662680
result = TLD_RESULT_SUCCESS;
681+
break;
682+
683+
default:
684+
result = TLD_RESULT_INVALID;
685+
break;
686+
663687
}
664688

665-
/* return a valid result */
666689
info->f_category = tld_descriptions[p].f_category;
667690
info->f_country = tld_descriptions[p].f_country;
668691
info->f_tld = level_ptr[level];
669-
info->f_offset = (int) (level_ptr[level] - uri);
692+
info->f_offset = offset;
670693

671694
free(level_ptr);
672695

src/tld_data.h

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/* TLD library -- TLD, domain name, and sub-domain extraction
2-
* Copyright (c) 2011-2019 Made to Order Software Corp. All Rights Reserved
2+
* Copyright (c) 2011-2021 Made to Order Software Corp. All Rights Reserved
33
*
44
* Permission is hereby granted, free of charge, to any person obtaining a
55
* copy of this software and associated documentation files (the
@@ -117,6 +117,9 @@ struct tld_description
117117
* The string is read-only and cannot ever be modified.
118118
* However, we do not return this string to the caller. Instead
119119
* we return a pointer inside the caller's string.
120+
*
121+
* The string may be "*" which means that any name is valid but
122+
* a name is required for a valid match at this level.
120123
*/
121124
const char * f_tld;
122125

0 commit comments

Comments
 (0)