Skip to content

Commit

Permalink
Merge pull request #175 from BuilderIO/revert-170-proxy
Browse files Browse the repository at this point in the history
Revert "feat: proxy support"
  • Loading branch information
steve8708 authored Aug 9, 2024
2 parents 73dfaef + 586ea5a commit 2ca876e
Show file tree
Hide file tree
Showing 4 changed files with 3 additions and 17 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# [1.5.0](https://github.com/BuilderIO/gpt-crawler/compare/v1.4.0...v1.5.0) (2024-07-05)


### Features

- git clone depth limit in docker ([87767db](https://github.com/BuilderIO/gpt-crawler/commit/87767dbda99b3259d44ec2c02dceb3a59bb2ca3c))
* git clone depth limit in docker ([87767db](https://github.com/BuilderIO/gpt-crawler/commit/87767dbda99b3259d44ec2c02dceb3a59bb2ca3c))

# [1.4.0](https://github.com/BuilderIO/gpt-crawler/compare/v1.3.0...v1.4.0) (2024-01-15)

Expand Down
1 change: 0 additions & 1 deletion config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,4 @@ export const defaultConfig: Config = {
maxPagesToCrawl: 50,
outputFileName: "output.json",
maxTokens: 2000000,
// proxyUrls: ["http://username:password@proxyserver:port"], // socks5://username:password@proxyserver:port
};
4 changes: 0 additions & 4 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,6 @@ export const configSchema = z.object({
* @example 5000
*/
maxTokens: z.number().int().positive().optional(),
/** Optional proxy server
* @example ['http://username:password@proxyserver:port', 'socks5://username:password@proxyserver:port']
*/
proxyUrls: z.array(z.string()).optional(),
});

export type Config = z.infer<typeof configSchema>;
12 changes: 1 addition & 11 deletions src/core.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
// For more information, see https://crawlee.dev/
import {
Configuration,
PlaywrightCrawler,
ProxyConfiguration,
downloadListOfUrls,
} from "crawlee";
import { Configuration, PlaywrightCrawler, downloadListOfUrls } from "crawlee";
import { readFile, writeFile } from "fs/promises";
import { glob } from "glob";
import { Config, configSchema } from "./config.js";
Expand Down Expand Up @@ -59,13 +54,8 @@ export async function crawl(config: Config) {
if (process.env.NO_CRAWL !== "true") {
// PlaywrightCrawler crawls the web using a headless
// browser controlled by the Playwright library.
const proxyConfiguration = new ProxyConfiguration({
proxyUrls: config.proxyUrls,
});

crawler = new PlaywrightCrawler(
{
proxyConfiguration,
// Use the requestHandler to process each of the crawled pages.
async requestHandler({ request, page, enqueueLinks, log, pushData }) {
const title = await page.title();
Expand Down

0 comments on commit 2ca876e

Please sign in to comment.