Skip to content

Commit

Permalink
Implement new heuristics for IPFS URL validation (#13)
Browse files Browse the repository at this point in the history
* Minor change to tests
* fix typos
* Add more testing data
* Implement new heuristics for IPFS URL validation
* Update package version
  • Loading branch information
Barabazs authored Apr 13, 2022
1 parent f628f6a commit 86b87b4
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 58 deletions.
25 changes: 25 additions & 0 deletions is_ipfs/is_ipfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import cid
from multibase import decode
from multibase import get_codec


class Validator:
Expand Down Expand Up @@ -70,6 +71,30 @@ def _is_integral_ipfs_url(

if pattern == self.subdomainGatewayPattern:
_hash = _hash.lower()
try:
if get_codec(_hash).encoding not in ["base32", "base36"]:
return False
except:
return False
elif pattern == self.pathGatewayPattern:
if not str(_hash).startswith("Qm"):
try:
if get_codec(_hash).encoding not in [
"base2",
"base16",
"base32",
"base32hex",
"base36",
"base36upper",
"base58flickr",
"base58btc",
"base64url",
"base32",
"base36",
]:
return False
except:
pass

return Validator(_hash)._is_CID()

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

setuptools.setup(
name="py-is_ipfs",
version="0.0.4",
version="0.0.5",
description="Python library to identify valid IPFS resources",
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down
12 changes: 6 additions & 6 deletions tests/integration/test_is_ipfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,18 @@ def test_all(self):

with self.subTest("Test valid CID entries from fixtures"):
for key, value in testing_data.valid_entries["cid"].items():
for entries in value:
self.assertTrue(Validator(entries).is_ipfs())
for entry in value:
self.assertTrue(Validator(entry).is_ipfs())

with self.subTest("Test invalid CID entries from fixtures"):
for key, value in testing_data.invalid_entries["cid"].items():
for entries in value:
self.assertFalse(Validator(entries).is_ipfs())
for entry in value:
self.assertFalse(Validator(entry).is_ipfs())

with self.subTest("Test valid IPFS URL entries from fixtures"):
for key, value in testing_data.valid_entries["url"].items():
for entries in value:
self.assertTrue(Validator(entries).is_ipfs())
for entry in value:
self.assertTrue(Validator(entry).is_ipfs())

with self.subTest("Test invalid IPFS URL entries from fixtures"):
for entry in testing_data.invalid_entries["url"]["ipfs"]:
Expand Down
106 changes: 56 additions & 50 deletions tests/testing_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,72 +4,78 @@
valid_entries = {
"cid": {
"v0": [
"QmYjtig7VJQ6XsnUjqqJvj7QaMcCAwtrgNdahSiFofrE7o",
"QmYHNYAaYK5hm3ZhZFx5W9H6xydKDGimjdgJMrMSdnctEm",
"QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR",
"QmPZ9gcCEpqKTo6aq61g2nXGUhM4iCL3ewB6LDXZCtioEB", # base58btc
"QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR", # base58btc
],
"v1": [
"bafybeie5gq4jxvzmsym6hjlwxej4rwdoxt7wadqvmmwbqi7r27fclha2va",
"bafybeie2reiz2q6rbcuwpy2etyztjnceolu4rdi7rp3th2lsky4r5ckeey",
"0000000010111000000010010001000001100001111000100011100110011111011001000101011111111110100000110110011111001111010011111111101010000111111111100011010111100110100101110110010000101101001100001011100000000000001001011101101110000100101100110100111000011000111011110100101000011100100011010", # base2
"72700221014170434637310537764066371723775207774327464566205514134000113556045464703073645034432", # base8
"f01701220c3c4733ec8affd06cf9e9ff50ffc6bcd2ec85a6170004bb709669c31de94391a", # base16
"bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi", # base32
"v05o14863ohpjti5fvk3cv7kvuk7voqud5r45kobg015re2b6jgott51p38", # base32hex
"k2jmtxw8rjh1z69c6not3wtdxb0u3urbzhyll1t9jg6ox26dhi5sfi1m", # base36
"K2JMTXW8RJH1Z69C6NOT3WTDXB0U3URBZHYLL1T9JG6OX26DHI5SFI1M", # base36upper
"ZCJ7vHB6jBiaEvZ1B9N4m6KQ9kWC5bEAXKuzEMRNJzzgHrfXm", # base58flickr
"zdj7Wic6KcJAfWz1c9o4M6kq9Lwd5BfbxkVafnrojaaGiSFxM", # base58btc
"mAXASIMPEcz7Ir/0Gz56f9Q/8a80uyFphcABLtwlmnDHelDka", # base64
"uAXASIMPEcz7Ir_0Gz56f9Q_8a80uyFphcABLtwlmnDHelDka", # base64url
"bafybeie5gq4jxvzmsym6hjlwxej4rwdoxt7wadqvmmwbqi7r27fclha2va", # base32
"zdj7WWeQ43G6JJvLWQWZpyHuAMq6uYWRjkBXFad11vE2LHhQ7", # base58btc
],
"encoded": [
cid.from_string("QmYjtig7VJQ6XsnUjqqJvj7QaMcCAwtrgNdahSiFofrE7o"),
cid.from_string("QmYHNYAaYK5hm3ZhZFx5W9H6xydKDGimjdgJMrMSdnctEm"),
cid.from_string("QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR"),
cid.from_string("QmPZ9gcCEpqKTo6aq61g2nXGUhM4iCL3ewB6LDXZCtioEB"),
cid.from_string(
"bafybeie5gq4jxvzmsym6hjlwxej4rwdoxt7wadqvmmwbqi7r27fclha2va"
),
cid.from_string(
"bafybeie2reiz2q6rbcuwpy2etyztjnceolu4rdi7rp3th2lsky4r5ckeey"
),
cid.from_string("zdj7WWeQ43G6JJvLWQWZpyHuAMq6uYWRjkBXFad11vE2LHhQ7"),
"zdj7WWeQ43G6JJvLWQWZpyHuAMq6uYWRjkBXFad11vE2LHhQ7",
encode("base58btc", "QmYjtig7VJQ6XsnUjqqJvj7QaMcCAwtrgNdahSiFofrE7o"),
encode("base58btc", "QmYHNYAaYK5hm3ZhZFx5W9H6xydKDGimjdgJMrMSdnctEm"),
encode("base2", "QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR"),
encode("base8", "QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR"),
encode("base10", "QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR"),
encode("base16", "QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR"),
encode("base32", "QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR"),
encode("base32hex", "QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR"),
encode("base32z", "QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR"),
encode("base36", "QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR"),
encode("base36upper", "QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR"),
encode("base58btc", "QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR"),
encode("base58btc", "QmNQuBJ8tg4QN6mSLXHekxBbcToPwKxamWNrDdEugxMTDd"),
encode("base2", "QmNQuBJ8tg4QN6mSLXHekxBbcToPwKxamWNrDdEugxMTDd"),
encode("base58flickr", "QmNQuBJ8tg4QN6mSLXHekxBbcToPwKxamWNrDdEugxMTDd"),
encode("base2", "QmNQuBJ8tg4QN6mSLXHekxBbcToPwKxamWNrDdEugxMTDd"),
encode("base8", "QmNQuBJ8tg4QN6mSLXHekxBbcToPwKxamWNrDdEugxMTDd"),
encode("base10", "QmNQuBJ8tg4QN6mSLXHekxBbcToPwKxamWNrDdEugxMTDd"),
encode("base16", "QmNQuBJ8tg4QN6mSLXHekxBbcToPwKxamWNrDdEugxMTDd"),
encode("base16", "QmNQuBJ8tg4QN6mSLXHekxBbcToPwKxamWNrDdEugxMTDd"),
encode("base16", "QmNQuBJ8tg4QN6mSLXHekxBbcToPwKxamWNrDdEugxMTDd"),
encode("base32hex", "QmNQuBJ8tg4QN6mSLXHekxBbcToPwKxamWNrDdEugxMTDd"),
encode("base32", "QmNQuBJ8tg4QN6mSLXHekxBbcToPwKxamWNrDdEugxMTDd"),
encode("base32z", "QmNQuBJ8tg4QN6mSLXHekxBbcToPwKxamWNrDdEugxMTDd"),
encode("base36", "QmNQuBJ8tg4QN6mSLXHekxBbcToPwKxamWNrDdEugxMTDd"),
encode("base36upper", "QmNQuBJ8tg4QN6mSLXHekxBbcToPwKxamWNrDdEugxMTDd"),
encode("base58flickr", "QmNQuBJ8tg4QN6mSLXHekxBbcToPwKxamWNrDdEugxMTDd"),
encode("base58btc", "QmNQuBJ8tg4QN6mSLXHekxBbcToPwKxamWNrDdEugxMTDd"),
encode("base64", "QmNQuBJ8tg4QN6mSLXHekxBbcToPwKxamWNrDdEugxMTDd"),
encode("base64url", "QmNQuBJ8tg4QN6mSLXHekxBbcToPwKxamWNrDdEugxMTDd"),
"f01701220c3c4733ec8affd06cf9e9ff50ffc6bcd2ec85a6170004bb709669c31de94391a",
encode("base58flickr", "QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR"),
encode("base64", "QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR"),
encode("base64url", "QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR"),
],
},
"url": {
"ipfs": [
"http://ipfs.io/ipfs/QmYHNYAaYK5hm3ZhZFx5W9H6xydKDGimjdgJMrMSdnctEm?arg=val#hash",
"http://ipfs.alexandria.media/ipfs/QmeWz9YZEeNFXQhHg4PnR5ZiNr5isttgi5n1tc1eD5EfGU/content/index.html?arg=val#hash",
"http://ipfs.io/ipfs/QmYHNYAaYK5hm3ZhZFx5W9H6xydKDGimjdgJMrMSdnctEm",
"https://gateway.pinata.cloud/ipfs/Qmb4sw3sqA7AZsaRZ7vtMwxCduk1ExJL5gVDPcpnP8kxFK/",
"https://gateway.pinata.cloud/ipfs/bafybeif5dwlk2sdx5yge4azff2ovsnar63cu37ncw4n24vnqixwamwhxui/",
"https://bafybeif5dwlk2sdx5yge4azff2ovsnar63cu37ncw4n24vnqixwamwhxui.ipfs.dweb.link/",
"http://ipfs.io/ipfs/QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR?arg=val#hash", # base58btc
"http://ipfs.alexandria.media/ipfs/QmeWz9YZEeNFXQhHg4PnR5ZiNr5isttgi5n1tc1eD5EfGU/content/index.html?arg=val#hash", # base58btc
"http://ipfs.io/ipfs/QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR", # base58btc
"https://gateway.pinata.cloud/ipfs/Qmb4sw3sqA7AZsaRZ7vtMwxCduk1ExJL5gVDPcpnP8kxFK/", # base58btc
"https://gateway.pinata.cloud/ipfs/bafybeif5dwlk2sdx5yge4azff2ovsnar63cu37ncw4n24vnqixwamwhxui/", # base32
"https://bafybeif5dwlk2sdx5yge4azff2ovsnar63cu37ncw4n24vnqixwamwhxui.ipfs.dweb.link/", # base32
"https://ipfs.io/ipfs/bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi/", # base32
"https://ipfs.io/ipfs/f01701220c3c4733ec8affd06cf9e9ff50ffc6bcd2ec85a6170004bb709669c31de94391a", # base16
"https://ipfs.io/ipfs/0000000010111000000010010001000001100001111000100011100110011111011001000101011111111110100000110110011111001111010011111111101010000111111111100011010111100110100101110110010000101101001100001011100000000000001001011101101110000100101100110100111000011000111011110100101000011100100011010", # base2
"https://ipfs.io/ipfs/ZCJ7vHB6jBiaEvZ1B9N4m6KQ9kWC5bEAXKuzEMRNJzzgHrfXm", # base58flickr
"https://ipfs.io/ipfs/zdj7Wic6KcJAfWz1c9o4M6kq9Lwd5BfbxkVafnrojaaGiSFxM", # base58btc
"https://ipfs.io/ipfs/QmR7GSQM93Cx5eAg6a6yRzNde1FQv7uL6X1o4k7zrJa3LX/ipfs.draft3.pdf", # base58btc
],
},
"subdomain": {
"ipfs": [
"http://bafybeie5gq4jxvzmsym6hjlwxej4rwdoxt7wadqvmmwbqi7r27fclha2va.ipfs.dweb.link",
"http://bafybeidvtwx54qr44kidymvhfzefzxhgkieigwth6oswk75zhlzjdmunoy.ipfs.dweb.link/linkify-demo.html",
"http://bafybeie5gq4jxvzmsym6hjlwxej4rwdoxt7wadqvmmwbqi7r27fclha2va.ipfs.localhost:8080",
"http://bafybeie5gq4jxvzmsym6hjlwxej4rwdoxt7wadqvmmwbqi7r27fclha2va.ipfs.dweb.link", # base32
"http://bafybeidvtwx54qr44kidymvhfzefzxhgkieigwth6oswk75zhlzjdmunoy.ipfs.dweb.link/linkify-demo.html", # base32
"http://bafybeie5gq4jxvzmsym6hjlwxej4rwdoxt7wadqvmmwbqi7r27fclha2va.ipfs.localhost:8080", # base32
"https://bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi.ipfs.dweb.link/", # base32
"https://k2jmtxw8rjh1z69c6not3wtdxb0u3urbzhyll1t9jg6ox26dhi5sfi1m.ipfs.dweb.link/", # base36
],
},
}

invalid_entries = {
"cid": {
"all": [
str("QmYjtig7VJQ6XsnUjqqJvj7QaMcCAwtrgNdahSiFofrE7o").swapcase(),
str("QmYjtig7VJQ6XsnUjqqJvj7QaMcCAwtrgNdahSiFofrE7o").lower(),
str("QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR").swapcase(),
str("QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR").lower(),
"afybeie5gq4jxvzmsym6hjlwxej4rwdoxt7wadqvmmwbqi7r27fclha2va"
"bafybeie2reiz2q6rbcuwpy2etyztjnceolu4rdi7rp3th2lsky4r5ckee"
"QmYjtig7VJQ6XsnUjqqJvj7QaMcCAwtrgNdahSiFofrE70",
Expand All @@ -88,26 +94,26 @@
"ipfs": [
"http://ipfs.io/ipns/github.com/",
"https://Qmb4sw3sqA7AZsaRZ7vtMwxCduk1ExJL5gVDPcpnP8kxFK.ipfs.dweb.link/",
"http://ipfs.io/ipns/QmYHNYAaYK5hm3ZhZFx5W9H6xydKDGimjdgJMrMSdnctEm",
"http://ipfs.io/ipns/QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR",
"https://github.com/ipfs/js-ipfs/blob/master/README.md",
"https://google.com",
"http://ipfs.io/ipns/github.com/",
"https://github.com/ipfs/js-ipfs/blob/master/README.md",
"http://ipfs.io/ipns/github.com/",
"https://github.com/ipfs/js-ipfs/blob/master/README.md",
],
},
"subdomain": {
"ipfs": [
"http://bafybeiabc2xofh6tdi6vutusorpumwcikw3hf3st4ecjugo6j52f6xwc6q.ipns.dweb.link",
"http://not-a-cid.ipfs.dweb.link",
"http://QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR.ipfs.dweb.link",
"http://QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR.ipfs.dweb.link", # base58btc
"http://bafybeie5gq4jxvzmsym6hjlwxej4rwdoxt7wadqvmmwbqi7r27fclha2va.dweb.link",
"http://QmcNioXSC1bfJj1dcFErhUfyjFzoX2HodkRccsFFVJJvg8.ipns.dweb.link",
"http://QmcNioXSC1bfJj1dcFErhUfyjFzoX2HodkRccsFFVJJvg8.ipns.dweb.link", # base58btc
"http://bafybeiabc2xofh6tdi6vutusorpumwcikw3hf3st4ecjugo6j52f6xwc6q.dweb.link",
"http://invalid-hostname-.ipns.dweb.link",
"http://www.bafybeie5gq4jxvzmsym6hjlwxej4rwdoxt7wadqvmmwbqi7r27fclha2va.ipfs.dweb.link",
"http://not-a-cid-or-valid-hostname-.ipns.dweb.link",
"https://zdj7Wic6KcJAfWz1c9o4M6kq9Lwd5BfbxkVafnrojaaGiSFxM.ipfs.dweb.link", # base58btc
"https://f01701220c3c4733ec8affd06cf9e9ff50ffc6bcd2ec85a6170004bb709669c31de94391a.ipfs.dweb.link/", # base16
"https://ipfs.io/ipfs/7002700221014170434637310537764066371723775207774327464566205514134000113556045464703073645034432", # base8
"https://ipfs.io/ipfs/92793123896416649578508430956173875066425468388805468715479907750778834469731416946970", # base10
],
},
}
1 change: 0 additions & 1 deletion tests/unit/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ class TestCase(unittest.TestCase):
def test_ipfs_url(self):
with self.subTest("Test valid IPFS URL entries from fixtures"):
for entry in testing_data.valid_entries["url"]["ipfs"]:
print(entry)
self.assertTrue(Validator(entry)._is_ipfs_url())

with self.subTest("Test invalid IPFS URL entries from fixtures"):
Expand Down

0 comments on commit 86b87b4

Please sign in to comment.