Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion scripts/_proxy-utils.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,15 @@ function resolveProxyString() {
/**
* Returns proxy as "user:pass@host:port" string for use with HTTP CONNECT tunneling.
* Does NOT replace gate.decodo.com → us.decodo.com; CONNECT endpoint is gate.decodo.com.
* When PROXY_URL uses https:// (TLS proxy), returns "https://user:pass@host:port" so
* httpsProxyFetchJson can detect and use https.request instead of http.request.
* Returns empty string if no proxy configured.
Comment on lines +85 to 87
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Stale JSDoc — references https.request instead of tls.connect

The comment says httpsProxyFetchJson will "detect and use https.request instead of http.request", but the new implementation unconditionally uses tls.connect for the proxy leg regardless of the https:// prefix. The prefix is now purely a normalisation marker that is immediately stripped inside httpsProxyFetchJson, not a dispatch signal. The JSDoc should be updated to reflect that the actual connection always goes through tls.connect.

*/
function resolveProxyStringConnect() {
const cfg = resolveProxyConfigWithFallback();
if (!cfg) return '';
return cfg.auth ? `${cfg.auth}@${cfg.host}:${cfg.port}` : `${cfg.host}:${cfg.port}`;
const base = cfg.auth ? `${cfg.auth}@${cfg.host}:${cfg.port}` : `${cfg.host}:${cfg.port}`;
return cfg.tls ? `https://${base}` : base;
}

module.exports = { parseProxyConfig, resolveProxyConfig, resolveProxyConfigWithFallback, resolveProxyString, resolveProxyStringConnect };
63 changes: 40 additions & 23 deletions scripts/_seed-utils.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -342,43 +342,60 @@ export function curlFetch(url, proxyAuth, headers = {}) {
return raw.slice(0, nl);
}

// Pure Node.js HTTPS-through-HTTP-proxy (CONNECT tunnel).
// Replaces curlFetch for seeder scripts running in containers without curl.
// proxyAuth format: "user:pass@host:port"
// Pure Node.js HTTPS-through-TLS-proxy (CONNECT tunnel).
// Always connects to the proxy over TLS (tls.connect), then manually sends the HTTP
// CONNECT request over the TLS socket. This works for both plain PROXY_URL values
// ("user:pass@host:port") and https:// prefixed values — always uses TLS to proxy.
// proxyAuth format: "user:pass@host:port" OR "https://user:pass@host:port"
async function httpsProxyFetchJson(url, proxyAuth) {
const targetUrl = new URL(url);
const atIdx = proxyAuth.lastIndexOf('@');
const credentials = atIdx >= 0 ? proxyAuth.slice(0, atIdx) : '';
const hostPort = atIdx >= 0 ? proxyAuth.slice(atIdx + 1) : proxyAuth;

// Normalise proxyAuth: strip https:// prefix if present, parse user:pass@host:port.
let proxyAuthStr = proxyAuth;
if (proxyAuth.startsWith('https://') || proxyAuth.startsWith('http://')) {
const u = new URL(proxyAuth);
proxyAuthStr = (u.username ? `${decodeURIComponent(u.username)}:${decodeURIComponent(u.password)}@` : '') + `${u.hostname}:${u.port}`;
}

const atIdx = proxyAuthStr.lastIndexOf('@');
const credentials = atIdx >= 0 ? proxyAuthStr.slice(0, atIdx) : '';
const hostPort = atIdx >= 0 ? proxyAuthStr.slice(atIdx + 1) : proxyAuthStr;
const colonIdx = hostPort.lastIndexOf(':');
const proxyHost = hostPort.slice(0, colonIdx);
const proxyPort = parseInt(hostPort.slice(colonIdx + 1), 10);

const connectHeaders = {};
if (credentials) {
connectHeaders['Proxy-Authorization'] = `Basic ${Buffer.from(credentials).toString('base64')}`;
}
// Step 1: TLS connect to proxy (always TLS — Decodo gate.decodo.com requires it).
const proxySock = await new Promise((resolve, reject) => {
const s = tls.connect({ host: proxyHost, port: proxyPort, servername: proxyHost, ALPNProtocols: ['http/1.1'] }, () => resolve(s));
s.on('error', reject);
});

// Step 2: Send HTTP CONNECT over the TLS socket manually (avoids Node.js http.request
// auto-setting Host to the proxy hostname, which Decodo rejects with SOCKS5 bytes).
const authHeader = credentials ? `\r\nProxy-Authorization: Basic ${Buffer.from(credentials).toString('base64')}` : '';
proxySock.write(`CONNECT ${targetUrl.hostname}:443 HTTP/1.1\r\nHost: ${targetUrl.hostname}:443${authHeader}\r\n\r\n`);

const { socket } = await new Promise((resolve, reject) => {
http.request({
host: proxyHost, port: proxyPort,
method: 'CONNECT',
path: `${targetUrl.hostname}:443`,
headers: connectHeaders,
}).on('connect', (res, socket) => {
if (res.statusCode !== 200) {
socket.destroy();
return reject(Object.assign(new Error(`Proxy CONNECT: ${res.statusCode}`), { status: res.statusCode }));
// Step 3: Read CONNECT response (first data chunk contains the status line).
await new Promise((resolve, reject) => {
proxySock.once('data', (chunk) => {
const resp = chunk.toString('ascii');
if (!resp.startsWith('HTTP/1.1 200') && !resp.startsWith('HTTP/1.0 200')) {
proxySock.destroy();
return reject(Object.assign(new Error(`Proxy CONNECT: ${resp.split('\r\n')[0]}`), { status: parseInt(resp.split(' ')[1]) || 0 }));
}
resolve({ socket });
}).on('error', reject).end();
proxySock.pause();
resolve();
});
proxySock.on('error', reject);
});
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 CONNECT response parsed from single data chunk

proxySock.once('data', …) captures only the first TCP segment. In practice this is fine — Decodo sends a short HTTP/1.1 200 Connection established\r\n\r\n that fits in one segment, and the server cannot send TLS data until we emit the ClientHello (which happens after step 3). However, if the proxy ever sends response headers across more than one chunk (e.g. a Proxy-Agent header arrives in a second segment) the unread tail will be handed to the TLS layer as garbage, silently breaking the handshake.

A minimal guard is to buffer until \r\n\r\n is seen before resolving:

await new Promise((resolve, reject) => {
  let buf = '';
  const onData = (chunk) => {
    buf += chunk.toString('ascii');
    if (!buf.includes('\r\n\r\n')) return;
    proxySock.off('data', onData);
    const statusLine = buf.split('\r\n')[0];
    if (!statusLine.startsWith('HTTP/1.1 200') && !statusLine.startsWith('HTTP/1.0 200')) {
      proxySock.destroy();
      return reject(Object.assign(new Error(`Proxy CONNECT: ${statusLine}`), { status: parseInt(statusLine.split(' ')[1]) || 0 }));
    }
    proxySock.pause();
    resolve();
  };
  proxySock.on('data', onData);
  proxySock.on('error', reject);
});


const tlsSock = tls.connect({ socket, servername: targetUrl.hostname, ALPNProtocols: ['http/1.1'] });
// Step 4: TLS over the proxy tunnel (TLS-in-TLS) to reach the target server.
const tlsSock = tls.connect({ socket: proxySock, servername: targetUrl.hostname, ALPNProtocols: ['http/1.1'] });
await new Promise((resolve, reject) => {
tlsSock.on('secureConnect', resolve);
tlsSock.on('error', reject);
});
proxySock.resume();
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Redundant proxySock.resume() after TLS upgrade

Node.js's TLSSocket constructor internally calls socket.resume() on the wrapped parent socket so that TLS data can flow during the handshake. By the time secureConnect fires the socket is already resumed; this explicit call is a no-op. It can be removed to avoid any confusion about why the underlying socket is being manipulated after the TLS layer has taken over.


return new Promise((resolve, reject) => {
const timer = setTimeout(() => { tlsSock.destroy(); reject(new Error('FRED proxy fetch timeout')); }, 20000);
Expand Down
Loading