Skip to content

Commit a26258c

Browse files
committed
support host-specific global proxy settings:
- requires crawler 0.6.2 or higher - if 'matchHosts' use mapping from hosts->named proxies - build proxy mapping json in proxies helm chart, bump to 0.2.0 - check if 'has-proxy-match-hosts' configmap is defined, and if so, map proxy secrets to volume - enable proxy volume mapping if either main proxy id or 'has-proxy-match-hosts' is defined
1 parent e1f057e commit a26258c

File tree

8 files changed

+71
-24
lines changed

8 files changed

+71
-24
lines changed

backend/btrixcloud/operator/crawls.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,7 @@ async def sync_crawls(self, data: MCSyncData):
326326
if pull_policy:
327327
params["crawler_image_pull_policy"] = pull_policy
328328

329+
proxy = None
329330
if crawl.proxy_id and not crawl.is_qa:
330331
proxy = self.crawl_config_ops.get_crawler_proxy(crawl.proxy_id)
331332
if proxy:
@@ -334,6 +335,10 @@ async def sync_crawls(self, data: MCSyncData):
334335
params["proxy_ssh_private_key"] = proxy.has_private_key
335336
params["proxy_ssh_host_public_key"] = proxy.has_host_public_key
336337

338+
params["add_proxies"] = proxy or (
339+
not crawl.is_qa and data.related[CMAP].get("has-proxy-match-hosts")
340+
)
341+
337342
params["storage_filename"] = spec["storage_filename"]
338343
params["restart_time"] = spec.get("restartTime")
339344

@@ -737,6 +742,11 @@ def get_related(self, data: MCBaseRequest):
737742
"resource": "crawljobs",
738743
"labelSelector": {"matchLabels": {"btrix.org": oid, "role": role}},
739744
},
745+
{
746+
"apiVersion": "v1",
747+
"resource": "configmaps",
748+
"labelSelector": {"matchLabels": {"role": "has-proxy-match-hosts"}},
749+
},
740750
]
741751

742752
if self.k8s.enable_auto_resize:

chart/Chart.lock

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@ dependencies:
1010
version: 4.11.11
1111
- name: btrix-proxies
1212
repository: file://./proxies/
13-
version: 0.1.0
14-
digest: sha256:2fd9472f857e9e3eacdcc616a3cffac5bb2951411cc2d34aea84253092225ecf
15-
generated: "2024-08-15T11:19:17.884682494+02:00"
13+
version: 0.2.0
14+
digest: sha256:7c0ea8ce57470fe27977bb1d6b88dda6da836f829484de55f9d41ee81351b272
15+
generated: "2025-05-11T12:23:32.959101-07:00"

chart/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,6 @@ dependencies:
1919
version: 4.11.11
2020
repository: "oci://ghcr.io/metacontroller"
2121
- name: btrix-proxies
22-
version: 0.1.0
22+
version: 0.2.0
2323
condition: btrix-proxies.enabled
2424
repository: file://./proxies/

chart/app-templates/crawler.yaml

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ spec:
7676
{% endif %}
7777
- name: tmpdir
7878
emptyDir: {}
79-
{% if proxy_id %}
79+
{% if add_proxies %}
8080
- name: proxies
8181
secret:
8282
secretName: proxies
@@ -144,17 +144,21 @@ spec:
144144
- --profile
145145
- "@{{ profile_filename }}"
146146
{% endif %}
147-
{% if proxy_id %}
147+
{% if add_proxies %}
148+
{% if proxy_url %}
148149
- --proxyServer
149150
- "{{ proxy_url }}"
150-
{% if proxy_ssh_private_key %}
151+
{% endif %}
152+
{% if proxy_id and proxy_ssh_private_key %}
151153
- --sshProxyPrivateKeyFile
152-
- /tmp/ssh-proxy/private-key
154+
- /tmp/proxies/{{ proxy_id }}-private-key
153155
{% endif %}
154-
{% if proxy_ssh_host_public_key %}
156+
{% if proxy_id and proxy_ssh_host_public_key %}
155157
- --sshProxyKnownHostsFile
156-
- /tmp/ssh-proxy/known-hosts
158+
- /tmp/proxies/{{ proxy_id }}-known-hosts
157159
{% endif %}
160+
- --proxyServerConfig
161+
- /tmp/proxies/host-proxies.json
158162
{% endif %}
159163
volumeMounts:
160164
- name: crawl-config
@@ -166,19 +170,10 @@ spec:
166170
mountPath: /tmp/qa/
167171
readOnly: True
168172
{% endif %}
169-
{% if proxy_id %}
170-
{% if proxy_ssh_private_key %}
171-
- name: proxies
172-
mountPath: /tmp/ssh-proxy/private-key
173-
subPath: {{ proxy_id }}-private-key
174-
readOnly: true
175-
{% endif %}
176-
{% if proxy_ssh_host_public_key %}
173+
{% if add_proxies %}
177174
- name: proxies
178-
mountPath: /tmp/ssh-proxy/known-hosts
179-
subPath: {{ proxy_id }}-known-hosts
175+
mountPath: /tmp/proxies/
180176
readOnly: true
181-
{% endif %}
182177
- name: force-user-and-group-name
183178
mountPath: /etc/passwd
184179
subPath: passwd
@@ -187,7 +182,7 @@ spec:
187182
mountPath: /etc/group
188183
subPath: group
189184
readOnly: true
190-
{% endif %}
185+
{% endif %}
191186
- name: crawl-data
192187
mountPath: /crawls
193188

-706 Bytes
Binary file not shown.
1018 Bytes
Binary file not shown.

chart/proxies/Chart.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ icon: https://webrecorder.net/assets/icon.png
77
# This is the chart version. This version number should be incremented each time you make changes
88
# to the chart and its templates, including the app version.
99
# Versions are expected to follow Semantic Versioning (https://semver.org/)
10-
version: 0.1.0
10+
version: 0.2.0
1111

1212
# This is the version number of the application being deployed. This version number should be
1313
# incremented each time you make changes to the application. Versions are not expected to
1414
# follow Semantic Versioning. They should reflect the version the application is using.
15-
appVersion: 0.1.0
15+
appVersion: 0.2.0

chart/proxies/templates/proxies.yaml

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,60 @@ metadata:
77
namespace: {{ .Values.crawler_namespace | default "crawlers" }}
88
type: Opaque
99
stringData:
10+
11+
{{ $proxyDict := dict }}
12+
{{ $hasMatchHosts := false }}
13+
1014
{{- range .Values.proxies }}
1115

16+
{{ $proxyEntry := dict "url" .url }}
17+
1218
{{- if .ssh_private_key }}
1319
{{ .id }}-private-key: |
1420
{{ .ssh_private_key | indent 4 }}
21+
{{- $_ := set $proxyEntry "privateKeyFile" (printf "/tmp/proxies/%s-private-key" .id) }}
1522
{{- end }}
1623

1724
{{- if .ssh_host_public_key }}
1825
{{ .id }}-known-hosts: |
1926
{{ .ssh_host_public_key | indent 4 }}
27+
{{- $_ := set $proxyEntry "publicHostsFile" (printf "/tmp/proxies/%s-known-hosts" .id) }}
28+
{{- end }}
29+
30+
{{- $_ := set $proxyDict .id $proxyEntry }}
31+
2032
{{- end }}
2133

34+
{{- if .Values.matchHosts }}
35+
{{- $proxies := dict }}
36+
37+
{{- range $hostrx, $name := .Values.matchHosts }}
38+
{{- $proxyEntry := get $proxyDict $name }}
39+
{{- if not $proxyEntry }}
40+
{{- fail (cat "Invalid proxy: 'matchHosts' referencing unknown proxy:" $name) }}
41+
{{- end }}
42+
{{- $_ := set $proxies $name $proxyEntry }}
43+
{{- $hasMatchHosts = true }}
44+
{{- end }}
45+
46+
{{- if $hasMatchHosts }}
47+
data:
48+
host-proxies.json: {{ dict "matchHosts" .Values.matchHosts "proxies" $proxies | toJson | b64enc | quote }}
2249
{{- end }}
50+
51+
{{- end }}
52+
53+
{{- if $hasMatchHosts }}
54+
---
55+
apiVersion: v1
56+
kind: ConfigMap
57+
metadata:
58+
name: has-proxy-match-hosts
59+
namespace: {{ .Values.crawler_namespace | default "crawlers" }}
60+
labels:
61+
role: has-proxy-match-hosts
62+
{{- end }}
63+
2364
---
2465
apiVersion: v1
2566
kind: Secret
@@ -31,4 +72,5 @@ type: Opaque
3172
data:
3273
crawler_proxies_last_update: {{ now | unixEpoch | toString | b64enc | quote }}
3374
crawler_proxies.json: {{ .Values.proxies | toJson | b64enc | quote }}
75+
3476
{{- end }}

0 commit comments

Comments
 (0)