Skip to content

Commit 95557ee

Browse files
Allow user to specify args for chromium process so they dont need SYS_ADMIN on container. (#4397)
* allow user to specify args for chromium process so they dont need SYS_ADMIN perms * use arg flag content * update console outputs
1 parent 7864e1a commit 95557ee

File tree

7 files changed

+29
-2
lines changed

7 files changed

+29
-2
lines changed

.github/workflows/dev-build.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ concurrency:
66

77
on:
88
push:
9-
branches: ['upload-ui-ux'] # put your current branch to create a build. Core team only.
9+
branches: ['3999-chromium-flags'] # put your current branch to create a build. Core team only.
1010
paths-ignore:
1111
- '**.md'
1212
- 'cloud-deployments/*'

collector/processLink/convert/generic.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ const {
55
const { writeToServerDocuments } = require("../../utils/files");
66
const { tokenizeString } = require("../../utils/tokenizer");
77
const { default: slugify } = require("slugify");
8+
const RuntimeSettings = require("../../utils/runtimeSettings");
89

910
/**
1011
* Scrape a generic URL and return the content in the specified format
@@ -106,10 +107,12 @@ function validatedHeaders(headers = {}) {
106107
async function getPageContent({ link, captureAs = "text", headers = {} }) {
107108
try {
108109
let pageContents = [];
110+
const runtimeSettings = new RuntimeSettings();
109111
const loader = new PuppeteerWebBaseLoader(link, {
110112
launchOptions: {
111113
headless: "new",
112114
ignoreHTTPSErrors: true,
115+
args: runtimeSettings.get("browserLaunchArgs"),
113116
},
114117
gotoOptions: {
115118
waitUntil: "networkidle2",

collector/utils/runtimeSettings/index.js

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,16 @@ class RuntimeSettings {
2727
// Value must be explicitly "true" or "false" as a string
2828
validate: (value) => String(value) === "true",
2929
},
30+
browserLaunchArgs: {
31+
default: [],
32+
validate: (value) => {
33+
let args = [];
34+
if (Array.isArray(value)) args = value.map((arg) => String(arg.trim()));
35+
if (typeof value === "string")
36+
args = value.split(",").map((arg) => arg.trim());
37+
return args;
38+
},
39+
},
3040
};
3141

3242
constructor() {

docker/.env.example

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -363,4 +363,9 @@ GID='1000'
363363
# Specify the target languages for when using OCR to parse images and PDFs.
364364
# This is a comma separated list of language codes as a string. Unsupported languages will be ignored.
365365
# Default is English. See https://tesseract-ocr.github.io/tessdoc/Data-Files-in-different-versions.html for a list of valid language codes.
366-
# TARGET_OCR_LANG=eng,deu,ita,spa,fra,por,rus,nld,tur,hun,pol,ita,spa,fra,por,rus,nld,tur,hun,pol
366+
# TARGET_OCR_LANG=eng,deu,ita,spa,fra,por,rus,nld,tur,hun,pol,ita,spa,fra,por,rus,nld,tur,hun,pol
367+
368+
# Runtime flags for built-in pupeeteer Chromium instance
369+
# This is only required on Linux machines running AnythingLLM via Docker
370+
# and do not want to use the --cap-add=SYS_ADMIN docker argument
371+
# ANYTHINGLLM_CHROMIUM_ARGS="--no-sandbox,--disable-setuid-sandbox"

server/.env.example

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,3 +362,8 @@ TTS_PROVIDER="native"
362362
# This is a comma separated list of language codes as a string. Unsupported languages will be ignored.
363363
# Default is English. See https://tesseract-ocr.github.io/tessdoc/Data-Files-in-different-versions.html for a list of valid language codes.
364364
# TARGET_OCR_LANG=eng,deu,ita,spa,fra,por,rus,nld,tur,hun,pol,ita,spa,fra,por,rus,nld,tur,hun,pol
365+
366+
# Runtime flags for built-in pupeeteer Chromium instance
367+
# This is only required on Linux machines running AnythingLLM via Docker
368+
# and do not want to use the --cap-add=SYS_ADMIN docker argument
369+
# ANYTHINGLLM_CHROMIUM_ARGS="--no-sandbox,--disable-setuid-sandbox"

server/utils/collectorApi/index.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ class CollectorApi {
3838
},
3939
runtimeSettings: {
4040
allowAnyIp: process.env.COLLECTOR_ALLOW_ANY_IP ?? "false",
41+
browserLaunchArgs: process.env.ANYTHINGLLM_CHROMIUM_ARGS ?? [],
4142
},
4243
};
4344
}

server/utils/helpers/updateENV.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1167,6 +1167,9 @@ function dumpENV() {
11671167

11681168
// Allow disabling of streaming for generic openai
11691169
"GENERIC_OPENAI_STREAMING_DISABLED",
1170+
1171+
// Specify Chromium args for collector
1172+
"ANYTHINGLLM_CHROMIUM_ARGS",
11701173
];
11711174

11721175
// Simple sanitization of each value to prevent ENV injection via newline or quote escaping.

0 commit comments

Comments
 (0)