Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 2 additions & 9 deletions README-zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,18 +122,11 @@ cp .env.example .env.local
docker compose up --build
```

如需使用 MinerU 文档解析服务,取消 `docker-compose.yml` 中 `mineru` 服务的注释即可。

### 可选:MinerU(增强文档解析)

如需更好地解析复杂表格和公式:

```bash
docker pull opendatalab/mineru:latest
docker run -d --name mineru -p 8080:8080 opendatalab/mineru:latest
```
[MinerU](https://github.com/opendatalab/MinerU) 提供更强的表格、公式和 OCR 解析能力。你可以使用 [MinerU 官方 API](https://mineru.net/) 或[自行部署](https://opendatalab.github.io/MinerU/quick_start/docker_deployment/)。

在 `.env.local` 中设置 `PDF_MINERU_BASE_URL=http://localhost:8080`
在 `.env.local` 中设置 `PDF_MINERU_BASE_URL`(如需认证则同时设置 `PDF_MINERU_API_KEY`)

---

Expand Down
11 changes: 2 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,18 +122,11 @@ cp .env.example .env.local
docker compose up --build
```

To use the optional MinerU document parser, uncomment the `mineru` service in `docker-compose.yml`.

### Optional: MinerU (Advanced Document Parsing)

For enhanced parsing of complex tables and formulas:

```bash
docker pull opendatalab/mineru:latest
docker run -d --name mineru -p 8080:8080 opendatalab/mineru:latest
```
[MinerU](https://github.com/opendatalab/MinerU) provides enhanced parsing for complex tables, formulas, and OCR. You can use the [MinerU official API](https://mineru.net/) or [self-host your own instance](https://opendatalab.github.io/MinerU/quick_start/docker_deployment/).

Set `PDF_MINERU_BASE_URL=http://localhost:8080` in `.env.local`.
Set `PDF_MINERU_BASE_URL` (and `PDF_MINERU_API_KEY` if needed) in `.env.local`.

---

Expand Down
57 changes: 57 additions & 0 deletions app/api/verify-pdf-provider/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import { NextRequest } from 'next/server';
import { createLogger } from '@/lib/logger';
import { apiError, apiSuccess } from '@/lib/server/api-response';
import { resolvePDFApiKey, resolvePDFBaseUrl } from '@/lib/server/provider-config';

const log = createLogger('Verify PDF Provider');

export async function POST(req: NextRequest) {
try {
const { providerId, apiKey, baseUrl } = await req.json();

if (!providerId) {
return apiError('MISSING_REQUIRED_FIELD', 400, 'Provider ID is required');
}

const resolvedBaseUrl = resolvePDFBaseUrl(providerId, baseUrl);
if (!resolvedBaseUrl) {
return apiError('MISSING_REQUIRED_FIELD', 400, 'Base URL is required');
}

const resolvedApiKey = resolvePDFApiKey(providerId, apiKey);

const headers: Record<string, string> = {};
if (resolvedApiKey) {
headers['Authorization'] = `Bearer ${resolvedApiKey}`;
}

const response = await fetch(resolvedBaseUrl, {
headers,
signal: AbortSignal.timeout(10000),
});

// MinerU's FastAPI root returns 404 (no root route), but the server is reachable.
// Any HTTP response (including 404) means the server is up.
return apiSuccess({
message: 'Connection successful',
status: response.status,
});
} catch (error) {
log.error('PDF provider test error:', error);

let errorMessage = 'Connection failed';
if (error instanceof Error) {
if (error.message.includes('ECONNREFUSED')) {
errorMessage = 'Cannot connect to server, please check the Base URL';
} else if (error.message.includes('ENOTFOUND')) {
errorMessage = 'Server not found, please check the Base URL';
} else if (error.message.includes('timeout') || error.name === 'TimeoutError') {
errorMessage = 'Connection timed out';
} else {
errorMessage = error.message;
}
}

return apiError('INTERNAL_ERROR', 500, errorMessage);
}
}
123 changes: 103 additions & 20 deletions components/settings/pdf-settings.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@ import { useState } from 'react';
import { Label } from '@/components/ui/label';
import { Input } from '@/components/ui/input';
import { Badge } from '@/components/ui/badge';
import { Button } from '@/components/ui/button';
import { useI18n } from '@/lib/hooks/use-i18n';
import { useSettingsStore } from '@/lib/store/settings';
import { PDF_PROVIDERS } from '@/lib/pdf/constants';
import type { PDFProviderId } from '@/lib/pdf/types';
import { CheckCircle2, Eye, EyeOff } from 'lucide-react';
import { CheckCircle2, Eye, EyeOff, Loader2, Zap, XCircle } from 'lucide-react';
import { cn } from '@/lib/utils';

/**
* Get display label for feature
Expand All @@ -32,20 +34,61 @@ interface PDFSettingsProps {
export function PDFSettings({ selectedProviderId }: PDFSettingsProps) {
const { t } = useI18n();
const [showApiKey, setShowApiKey] = useState(false);
const [testStatus, setTestStatus] = useState<'idle' | 'testing' | 'success' | 'error'>('idle');
const [testMessage, setTestMessage] = useState('');

const pdfProvidersConfig = useSettingsStore((state) => state.pdfProvidersConfig);
const setPDFProviderConfig = useSettingsStore((state) => state.setPDFProviderConfig);

const pdfProvider = PDF_PROVIDERS[selectedProviderId];
const isServerConfigured = !!pdfProvidersConfig[selectedProviderId]?.isServerConfigured;
const providerConfig = pdfProvidersConfig[selectedProviderId];
const hasBaseUrl = !!providerConfig?.baseUrl;
const needsRemoteConfig = selectedProviderId === 'mineru';

// Reset showApiKey when provider changes (derived state pattern)
// Reset state when provider changes
const [prevSelectedProviderId, setPrevSelectedProviderId] = useState(selectedProviderId);
if (selectedProviderId !== prevSelectedProviderId) {
setPrevSelectedProviderId(selectedProviderId);
setShowApiKey(false);
setTestStatus('idle');
setTestMessage('');
}

const handleTestConnection = async () => {
const baseUrl = providerConfig?.baseUrl;
if (!baseUrl) return;

setTestStatus('testing');
setTestMessage('');

try {
const response = await fetch('/api/verify-pdf-provider', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
providerId: selectedProviderId,
apiKey: providerConfig?.apiKey || '',
baseUrl,
}),
});

const data = await response.json();

if (data.success) {
setTestStatus('success');
setTestMessage(t('settings.connectionSuccess'));
} else {
setTestStatus('error');
setTestMessage(`${t('settings.connectionFailed')}: ${data.error}`);
}
} catch (err) {
setTestStatus('error');
const message = err instanceof Error ? err.message : String(err);
setTestMessage(`${t('settings.connectionFailed')}: ${message}`);
}
};

return (
<div className="space-y-6 max-w-3xl">
{/* Server-configured notice */}
Expand All @@ -55,19 +98,54 @@ export function PDFSettings({ selectedProviderId }: PDFSettingsProps) {
</div>
)}

{/* API Key + Base URL Configuration */}
{(pdfProvider.requiresApiKey || isServerConfigured) && (
{/* Base URL + API Key Configuration (for remote providers like MinerU) */}
{(needsRemoteConfig || isServerConfigured) && (
<>
<div className="grid grid-cols-2 gap-4">
<div className="space-y-2">
<Label className="text-sm">{t('settings.pdfApiKey')}</Label>
<Label className="text-sm">{t('settings.pdfBaseUrl')}</Label>
<div className="flex gap-2">
<Input
placeholder="http://localhost:8080"
value={providerConfig?.baseUrl || ''}
onChange={(e) =>
setPDFProviderConfig(selectedProviderId, { baseUrl: e.target.value })
}
className="text-sm"
/>
<Button
variant="outline"
size="sm"
onClick={handleTestConnection}
disabled={testStatus === 'testing' || !hasBaseUrl}
className="gap-1.5 shrink-0"
>
{testStatus === 'testing' ? (
<Loader2 className="h-3.5 w-3.5 animate-spin" />
) : (
<>
<Zap className="h-3.5 w-3.5" />
{t('settings.testConnection')}
</>
)}
</Button>
</div>
</div>

<div className="space-y-2">
<Label className="text-sm">
{t('settings.pdfApiKey')}
<span className="text-muted-foreground ml-1 font-normal">
({t('settings.optional')})
</span>
</Label>
<div className="relative">
<Input
type={showApiKey ? 'text' : 'password'}
placeholder={
isServerConfigured ? t('settings.optionalOverride') : t('settings.enterApiKey')
}
value={pdfProvidersConfig[selectedProviderId]?.apiKey || ''}
value={providerConfig?.apiKey || ''}
onChange={(e) =>
setPDFProviderConfig(selectedProviderId, {
apiKey: e.target.value,
Expand All @@ -84,25 +162,30 @@ export function PDFSettings({ selectedProviderId }: PDFSettingsProps) {
</button>
</div>
</div>
</div>

<div className="space-y-2">
<Label className="text-sm">{t('settings.pdfBaseUrl')}</Label>
<Input
placeholder="http://localhost:8080"
value={pdfProvidersConfig[selectedProviderId]?.baseUrl || ''}
onChange={(e) =>
setPDFProviderConfig(selectedProviderId, {
baseUrl: e.target.value,
})
}
className="text-sm"
/>
{/* Test result message */}
{testMessage && (
<div
className={cn(
'rounded-lg p-3 text-sm',
testStatus === 'success' &&
'bg-green-50 text-green-700 border border-green-200 dark:bg-green-950/30 dark:text-green-300 dark:border-green-800',
testStatus === 'error' &&
'bg-red-50 text-red-700 border border-red-200 dark:bg-red-950/30 dark:text-red-300 dark:border-red-800',
)}
>
<div className="flex items-center gap-2">
{testStatus === 'success' && <CheckCircle2 className="h-4 w-4 shrink-0" />}
{testStatus === 'error' && <XCircle className="h-4 w-4 shrink-0" />}
<span className="break-all">{testMessage}</span>
</div>
</div>
</div>
)}

{/* Request URL Preview */}
{(() => {
const effectiveBaseUrl = pdfProvidersConfig[selectedProviderId]?.baseUrl || '';
const effectiveBaseUrl = providerConfig?.baseUrl || '';
if (!effectiveBaseUrl) return null;
const fullUrl = effectiveBaseUrl + '/file_parse';
return (
Expand Down
9 changes: 0 additions & 9 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,5 @@ services:
- openmaic-data:/app/data
restart: unless-stopped

# Optional: MinerU for advanced document parsing (tables, formulas, OCR)
# Uncomment to enable:
#
# mineru:
# image: opendatalab/mineru:latest
# ports:
# - "8080:8080"
# restart: unless-stopped

volumes:
openmaic-data:
2 changes: 1 addition & 1 deletion lib/pdf/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ export const PDF_PROVIDERS: Record<PDFProviderId, PDFProviderConfig> = {
mineru: {
id: 'mineru',
name: 'MinerU',
requiresApiKey: true,
requiresApiKey: false,
icon: '/logos/mineru.png',
features: ['text', 'images', 'tables', 'formulas', 'layout-analysis'],
},
Expand Down
5 changes: 4 additions & 1 deletion vercel.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
"buildCommand": "pnpm build",
"functions": {
"app/api/**/*.ts": {
"maxDuration": 300
"maxDuration": 300,
"bodyParser": {
"sizeLimit": "50mb"
}
}
}
}
Loading