Skip to content

Commit 02d4361

Browse files
authored
fix(media): enable TLS certificate verification in LinkPipeline (#4427) (#4471)
Remove ssl=False from _fetch_and_parse; cert verification is now enabled by default. Add opt-in allow_self_signed metadata flag for internal URLs.
1 parent ca0f06f commit 02d4361

File tree

2 files changed

+57
-8
lines changed

2 files changed

+57
-8
lines changed

autobot-backend/media/link/pipeline.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,12 +88,16 @@ async def _process_link(self, media_input: MediaInput) -> Dict[str, Any]:
8888
async def _fetch_and_parse(self, url: str, metadata: Dict) -> Dict[str, Any]:
8989
"""Fetch URL and parse the HTML response."""
9090
headers = {"User-Agent": _USER_AGENT}
91+
# ssl=None uses the default aiohttp SSL context (cert verification enabled).
92+
# Callers may pass metadata={"allow_self_signed": True} to opt-in to skipping
93+
# cert verification for known-safe internal URLs.
94+
ssl_context = False if metadata.get("allow_self_signed") else None
9195
try:
9296
async with aiohttp.ClientSession(
9397
headers=headers, timeout=_DEFAULT_TIMEOUT
9498
) as session:
9599
async with session.get(
96-
url, allow_redirects=True, ssl=False
100+
url, allow_redirects=True, ssl=ssl_context
97101
) as response:
98102
final_url = str(response.url)
99103
content_type = response.headers.get("Content-Type", "")

autobot-backend/media/link/pipeline_test.py

Lines changed: 52 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -152,32 +152,77 @@ async def _run():
152152
class TestLinkPipelineHttp:
153153
"""Tests for HTTP fetch path."""
154154

155-
@pytest.mark.asyncio
156-
async def test_fetch_success(self):
157-
pipe = LinkPipeline()
158-
155+
def _make_mock_session(self, url, status=200):
156+
"""Helper: build a mock aiohttp ClientSession for fetch tests."""
159157
mock_response = AsyncMock()
160-
mock_response.url = "https://example.com"
158+
mock_response.url = url
161159
mock_response.headers = {"Content-Type": "text/html"}
162160
mock_response.text = AsyncMock(return_value=SAMPLE_HTML)
163-
mock_response.status = 200
161+
mock_response.status = status
164162
mock_response.__aenter__ = AsyncMock(return_value=mock_response)
165163
mock_response.__aexit__ = AsyncMock(return_value=False)
166164

167165
mock_session = AsyncMock()
168166
mock_session.get = MagicMock(return_value=mock_response)
169167
mock_session.__aenter__ = AsyncMock(return_value=mock_session)
170168
mock_session.__aexit__ = AsyncMock(return_value=False)
169+
return mock_session
170+
171+
@pytest.mark.asyncio
172+
async def test_fetch_success(self):
173+
pipe = LinkPipeline()
174+
mock_session = self._make_mock_session("https://example.com")
175+
_parsed = {"type": "link_fetch", "confidence": 0.9, "url": "https://example.com"}
171176

172177
with patch("media.link.pipeline._AIOHTTP_AVAILABLE", True), patch(
173178
"media.link.pipeline._BS4_AVAILABLE", True
174179
), patch(
175180
"media.link.pipeline.aiohttp.ClientSession", return_value=mock_session
176-
):
181+
), patch.object(pipe, "_parse_html", return_value=_parsed):
177182
result = await pipe._fetch_and_parse("https://example.com", {})
178183

179184
assert result["type"] == "link_fetch"
180185
assert result["confidence"] > 0
186+
# Default path must verify TLS certs (ssl=None, not ssl=False)
187+
mock_session.get.assert_called_once_with(
188+
"https://example.com", allow_redirects=True, ssl=None
189+
)
190+
191+
@pytest.mark.asyncio
192+
async def test_fetch_default_verifies_tls(self):
193+
"""ssl=None (cert verification) is used when allow_self_signed is absent."""
194+
pipe = LinkPipeline()
195+
mock_session = self._make_mock_session("https://example.com")
196+
_parsed = {"type": "link_fetch", "confidence": 0.9}
197+
198+
with patch("media.link.pipeline._AIOHTTP_AVAILABLE", True), patch(
199+
"media.link.pipeline._BS4_AVAILABLE", True
200+
), patch(
201+
"media.link.pipeline.aiohttp.ClientSession", return_value=mock_session
202+
), patch.object(pipe, "_parse_html", return_value=_parsed):
203+
await pipe._fetch_and_parse("https://example.com", {})
204+
205+
_call_kwargs = mock_session.get.call_args.kwargs
206+
assert _call_kwargs.get("ssl") is None, "Default fetch must NOT disable cert verification"
207+
208+
@pytest.mark.asyncio
209+
async def test_fetch_allow_self_signed_disables_tls(self):
210+
"""ssl=False is used only when metadata allow_self_signed=True is explicitly set."""
211+
pipe = LinkPipeline()
212+
mock_session = self._make_mock_session("https://internal.example.com")
213+
_parsed = {"type": "link_fetch", "confidence": 0.9}
214+
215+
with patch("media.link.pipeline._AIOHTTP_AVAILABLE", True), patch(
216+
"media.link.pipeline._BS4_AVAILABLE", True
217+
), patch(
218+
"media.link.pipeline.aiohttp.ClientSession", return_value=mock_session
219+
), patch.object(pipe, "_parse_html", return_value=_parsed):
220+
await pipe._fetch_and_parse(
221+
"https://internal.example.com", {"allow_self_signed": True}
222+
)
223+
224+
_call_kwargs = mock_session.get.call_args.kwargs
225+
assert _call_kwargs.get("ssl") is False, "allow_self_signed=True must set ssl=False"
181226

182227
@pytest.mark.asyncio
183228
async def test_fetch_http_error(self):

0 commit comments

Comments
 (0)