Skip to content

Commit

Permalink
Merge pull request #185 from orhtej2/fix_curl_tests
Browse files Browse the repository at this point in the history
Use proper URI parsing for asset tests.
  • Loading branch information
alexAubin authored Nov 28, 2024
2 parents 5410830 + ba3b98b commit 80df0a1
Showing 1 changed file with 18 additions and 25 deletions.
43 changes: 18 additions & 25 deletions lib/curl_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import tempfile
import pycurl
from bs4 import BeautifulSoup
from urllib.parse import urlencode, urljoin
from urllib.parse import urlencode, urlparse
from io import BytesIO

DOMAIN = os.environ["DOMAIN"]
Expand Down Expand Up @@ -163,9 +163,6 @@ def test(
content = content.get_text().strip() if content else ""
content = re.sub(r"[\t\n\s]{3,}", "\n\n", content)

base_tag = html.find("base")
base = base_tag.get("href", "") if base_tag else ""

errors = []
if expect_effective_url is None and "/yunohost/sso" in effective_url:
errors.append(
Expand Down Expand Up @@ -194,45 +191,41 @@ def test(
assets_to_check = []
stylesheets = html.find_all("link", rel="stylesheet", href=True)
stylesheets = [
s
s["href"]
for s in stylesheets
if "ynh_portal" not in s["href"]
and "ynhtheme" not in s["href"]
and "ynh_overlay" not in s["href"]
]
if stylesheets:
assets_to_check.append(stylesheets[0]["href"])
for sheet in stylesheets:
parsed = urlparse(sheet)
if parsed.netloc != "" and parsed.netloc != domain:
continue
assets_to_check.append(parsed._replace(netloc=domain)._replace(scheme="https").geturl())
break

js = html.find_all("script", src=True)
js = [
s
s["src"]
for s in js
if "ynh_portal" not in s["src"]
and "ynhtheme" not in s["src"]
and "ynh_overlay" not in s["src"]
]
if js:
assets_to_check.append(js[0]["src"])
for js in js:
parsed = urlparse(js)
if parsed.netloc != "" and parsed.netloc != domain:
continue
assets_to_check.append(parsed._replace(netloc=domain)._replace(scheme="https").geturl())
break

if not assets_to_check:
print(
"\033[1m\033[93mWARN\033[0m auto_test_assets set to true, but no js/css asset found in this page"
)
for asset in assets_to_check:
# FIXME : this is pretty clumsy, should probably be replaced with a proper URL parsing to serparate domains etc...
if asset.startswith(f"//"):
asset = f"https:{asset}"
if asset.startswith(f"https://") or asset.startswith(f"http://"):
if asset.startswith(f"https://{domain}"):
asset = asset.replace(f"https://{domain}", "")
else:
print(
f"\033[1m\033[93mWARN\033[0m Found asset '{asset}' which seems to be hosted on a third party, external website ... Not super great for privacy etc... ?"
)
continue
elif asset.startswith(f"{domain}/"):
asset = asset.replace(f"{domain}/", "")
if not asset.startswith("/"):
asset = urljoin(base + "/", asset)
resolved_asset_url = urljoin(f"https://{domain}", asset)
for resolved_asset_url in assets_to_check:
asset_code, _, effective_asset_url = curl(
resolved_asset_url, use_cookies=cookies
)
Expand Down

0 comments on commit 80df0a1

Please sign in to comment.