@@ -12,66 +12,58 @@ export const GET = defineHandler({ auth: 'cronjob' }, async () => {
12
12
const reader = fromFlowReaders ( flow , index . config . reader ) ;
13
13
14
14
await executeInSafeDuration ( async ( ) => {
15
- const documentWithoutMeta = await getDb ( ) . selectFrom ( 'llamaindex_document_node' )
15
+ const documentWithoutMeta = await getDb ( )
16
+ . selectFrom ( 'llamaindex_document_node' )
16
17
. selectAll ( )
17
18
. where ( ( eb ) => {
18
- return eb (
19
- eb . ref ( 'metadata' , '->$' ) . key ( 'documentMetadata' as never ) ,
20
- 'is' ,
21
- eb . val ( null ) ,
22
- )
19
+ return eb . and ( [
20
+ eb ( eb . fn ( 'JSON_UNQUOTE' , [ eb . ref ( 'metadata' ) ] ) , '=' , eb . val ( 'null' ) ) ,
21
+ ] )
23
22
} )
24
23
. limit ( 100 )
25
24
. execute ( ) ;
26
25
27
26
console . log ( `Found ${ documentWithoutMeta . length } documents without metadata.` )
28
27
29
28
const documentIds = Array . from ( new Set ( documentWithoutMeta . map ( doc => doc . document_id ) ) ) ;
29
+ if ( documentIds . length == 0 ) {
30
+ return false ;
31
+ }
32
+
30
33
const documents = await getDb ( )
31
34
. selectFrom ( 'document' )
32
- . select ( 'id' )
33
- . select ( ' content_uri')
34
- . select ( 'source_uri' )
35
+ . select ( [
36
+ 'id' , 'source_uri' , ' content_uri'
37
+ ] )
35
38
. where ( 'id' , 'in' , documentIds )
36
39
. where ( 'mime' , '=' , 'text/html' )
37
40
. execute ( ) ;
38
41
39
- console . log ( `Found ${ documents . length } documents to process.` )
40
-
41
- if ( documents . length == 0 ) {
42
- return false ;
43
- }
42
+ console . log ( `Found ${ documents . length } documents to process.` ) ;
44
43
45
44
await Promise . all ( documents . map ( async document => {
46
- const docsWithMeta = await reader . loadData ( {
45
+ return reader . loadData ( {
47
46
mime : 'text/html' ,
48
47
content_uri : document . content_uri ,
49
48
source_uri : document . source_uri ,
49
+ } ) . then ( ( docsWithMeta ) => {
50
+ console . log ( `Processing document ${ document . id } .` )
51
+ for ( let docWithMeta of docsWithMeta ) {
52
+ getDb ( )
53
+ . updateTable ( 'llamaindex_document_node' )
54
+ . where ( 'document_id' , '=' , document . id )
55
+ . set ( ( { eb } ) => ( {
56
+ metadata : eb . fn ( 'JSON_MERGE_PATCH' , [
57
+ eb . ref ( 'metadata' ) ,
58
+ eb . val ( JSON . stringify ( docWithMeta . metadata ) ) ,
59
+ ] ) ,
60
+ } ) )
61
+ . execute ( )
62
+ . then ( null )
63
+ }
64
+ } ) . catch ( ( e ) => {
65
+ console . error ( `Failed to process document ${ document . id } .` , e ) ;
50
66
} ) ;
51
-
52
- console . log ( `Processing document ${ document . id } .` )
53
-
54
- for ( let docWithMeta of docsWithMeta ) {
55
- await getDb ( ) . updateTable ( 'llamaindex_document_node' )
56
- . where ( 'document_id' , '=' , document . id )
57
- . set ( ( { eb } ) => ( {
58
- metadata : eb . fn ( 'JSON_MERGE_PATCH' , [
59
- eb . ref ( 'metadata' ) ,
60
- eb . val ( JSON . stringify ( docWithMeta . metadata ) ) ,
61
- ] ) ,
62
- } ) )
63
- . execute ( ) ;
64
-
65
- await getDb ( ) . updateTable ( 'llamaindex_document_chunk_node_default' )
66
- . where ( 'document_id' , '=' , document . id )
67
- . set ( ( { eb } ) => ( {
68
- metadata : eb . fn ( 'JSON_MERGE_PATCH' , [
69
- eb . ref ( 'metadata' ) ,
70
- eb . val ( JSON . stringify ( docWithMeta . metadata ) ) ,
71
- ] ) ,
72
- } ) )
73
- . execute ( ) ;
74
- }
75
67
} ) ) ;
76
68
77
69
return true ;
0 commit comments