Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/dev-build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ concurrency:

on:
push:
branches: ['upload-ui-ux'] # put your current branch to create a build. Core team only.
branches: ['3999-chromium-flags'] # put your current branch to create a build. Core team only.
paths-ignore:
- '**.md'
- 'cloud-deployments/*'
Expand Down
3 changes: 3 additions & 0 deletions collector/processLink/convert/generic.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ const {
const { writeToServerDocuments } = require("../../utils/files");
const { tokenizeString } = require("../../utils/tokenizer");
const { default: slugify } = require("slugify");
const RuntimeSettings = require("../../utils/runtimeSettings");

/**
* Scrape a generic URL and return the content in the specified format
Expand Down Expand Up @@ -106,10 +107,12 @@ function validatedHeaders(headers = {}) {
async function getPageContent({ link, captureAs = "text", headers = {} }) {
try {
let pageContents = [];
const runtimeSettings = new RuntimeSettings();
const loader = new PuppeteerWebBaseLoader(link, {
launchOptions: {
headless: "new",
ignoreHTTPSErrors: true,
args: runtimeSettings.get("browserLaunchArgs"),
},
gotoOptions: {
waitUntil: "networkidle2",
Expand Down
10 changes: 10 additions & 0 deletions collector/utils/runtimeSettings/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,16 @@ class RuntimeSettings {
// Value must be explicitly "true" or "false" as a string
validate: (value) => String(value) === "true",
},
browserLaunchArgs: {
default: [],
validate: (value) => {
let args = [];
if (Array.isArray(value)) args = value.map((arg) => String(arg.trim()));
if (typeof value === "string")
args = value.split(",").map((arg) => arg.trim());
return args;
},
},
};

constructor() {
Expand Down
7 changes: 6 additions & 1 deletion docker/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -363,4 +363,9 @@ GID='1000'
# Specify the target languages for when using OCR to parse images and PDFs.
# This is a comma separated list of language codes as a string. Unsupported languages will be ignored.
# Default is English. See https://tesseract-ocr.github.io/tessdoc/Data-Files-in-different-versions.html for a list of valid language codes.
# TARGET_OCR_LANG=eng,deu,ita,spa,fra,por,rus,nld,tur,hun,pol,ita,spa,fra,por,rus,nld,tur,hun,pol
# TARGET_OCR_LANG=eng,deu,ita,spa,fra,por,rus,nld,tur,hun,pol,ita,spa,fra,por,rus,nld,tur,hun,pol

# Runtime flags for built-in pupeeteer Chromium instance
# This is only required on Linux machines running AnythingLLM via Docker
# and do not want to use the --cap-add=SYS_ADMIN docker argument
# ANYTHINGLLM_CHROMIUM_ARGS="--no-sandbox,--disable-setuid-sandbox"
5 changes: 5 additions & 0 deletions server/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -362,3 +362,8 @@ TTS_PROVIDER="native"
# This is a comma separated list of language codes as a string. Unsupported languages will be ignored.
# Default is English. See https://tesseract-ocr.github.io/tessdoc/Data-Files-in-different-versions.html for a list of valid language codes.
# TARGET_OCR_LANG=eng,deu,ita,spa,fra,por,rus,nld,tur,hun,pol,ita,spa,fra,por,rus,nld,tur,hun,pol

# Runtime flags for built-in pupeeteer Chromium instance
# This is only required on Linux machines running AnythingLLM via Docker
# and do not want to use the --cap-add=SYS_ADMIN docker argument
# ANYTHINGLLM_CHROMIUM_ARGS="--no-sandbox,--disable-setuid-sandbox"
1 change: 1 addition & 0 deletions server/utils/collectorApi/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class CollectorApi {
},
runtimeSettings: {
allowAnyIp: process.env.COLLECTOR_ALLOW_ANY_IP ?? "false",
browserLaunchArgs: process.env.ANYTHINGLLM_CHROMIUM_ARGS ?? [],
},
};
}
Expand Down
3 changes: 3 additions & 0 deletions server/utils/helpers/updateENV.js
Original file line number Diff line number Diff line change
Expand Up @@ -1167,6 +1167,9 @@ function dumpENV() {

// Allow disabling of streaming for generic openai
"GENERIC_OPENAI_STREAMING_DISABLED",

// Specify Chromium args for collector
"ANYTHINGLLM_CHROMIUM_ARGS",
];

// Simple sanitization of each value to prevent ENV injection via newline or quote escaping.
Expand Down