1
- import defaultConfig from "./config" ;
2
- import { DataConfig } from "./config.types" ;
3
- import * as P from 'papaparse' ;
4
- import type { AsyncDuckDB , AsyncDuckDBConnection } from "@duckdb/duckdb-wasm" ;
5
- import {
6
- getDuckDb ,
7
- runQuery ,
8
- loadParquet
9
- } from 'utils/duckdb'
1
+ import defaultConfig from "./config"
2
+ import { DataConfig } from "./config.types"
3
+ import { DuckDBDataProtocol , type AsyncDuckDB , type AsyncDuckDBConnection } from "@duckdb/duckdb-wasm"
4
+ import { getDuckDb , runQuery } from "utils/duckdb"
5
+ import * as d3 from "d3"
6
+ import tinycolor from "tinycolor2"
10
7
11
8
export class DataService {
12
- config : DataConfig [ ] ;
13
- data : Record < string , Record < string , Record < string | number , number > > > = { } ;
14
- complete : Array < string > = [ ] ;
15
- eagerData : Array < string > = [ ] ;
16
- completeCallback ?: ( s : string ) => void ;
17
- hasRunWasm : boolean = false ;
18
- dbStatus : ' none' | ' loading' | ' loaded' | ' error' = ' none' ;
19
- db ?: AsyncDuckDB ;
20
- baseURL : string = window . location . origin ;
21
- conn ?: AsyncDuckDBConnection ;
9
+ config : DataConfig [ ]
10
+ data : Record < string , Record < string , Record < string | number , number > > > = { }
11
+ complete : Array < string > = [ ]
12
+ eagerData : Array < string > = [ ]
13
+ completeCallback ?: ( s : string ) => void
14
+ hasRunWasm : boolean = false
15
+ dbStatus : " none" | " loading" | " loaded" | " error" = " none"
16
+ db ?: AsyncDuckDB
17
+ baseURL : string = window . location . origin
18
+ conn ?: AsyncDuckDBConnection
22
19
23
20
constructor ( completeCallback ?: ( s : string ) => void , config : DataConfig [ ] = defaultConfig ) {
24
- this . config = config ;
25
- this . completeCallback = completeCallback ;
21
+ this . config = config
22
+ this . completeCallback = completeCallback
26
23
}
27
24
28
- initData ( ) {
29
- console . log ( 'FETCHING DATA!!!' )
30
- const eagerData = this . config . filter ( c => c . eager ) ;
31
- eagerData . forEach ( c => this . fetchData ( c ) ) ;
25
+ initData ( ) {
26
+ const eagerData = this . config . filter ( ( c ) => c . eager )
27
+ eagerData . forEach ( ( c ) => this . registerData ( c ) )
32
28
}
33
29
34
- async waitForDb ( ) {
35
- if ( this . dbStatus === ' loaded' ) {
36
- return ;
30
+ async waitForDb ( ) {
31
+ if ( this . dbStatus === " loaded" ) {
32
+ return
37
33
}
38
- while ( this . dbStatus === ' loading' ) {
39
- await new Promise ( ( r ) => setTimeout ( r , 100 ) ) ;
34
+ while ( this . dbStatus === " loading" ) {
35
+ await new Promise ( ( r ) => setTimeout ( r , 100 ) )
40
36
}
41
37
}
42
- async initDb ( ) {
43
- console . log ( 'RUNNING WASM!!!' )
44
- if ( this . dbStatus === 'loaded' ) {
45
- return ;
46
- } else if ( this . dbStatus === 'loading' ) {
47
- console . log ( 'Waiting for db to load' ) ;
48
- return this . waitForDb ( ) ;
49
- }
50
- this . dbStatus = 'loading' ;
38
+ async initDb ( ) {
39
+ if ( this . dbStatus === "loaded" ) {
40
+ return
41
+ } else if ( this . dbStatus === "loading" ) {
42
+ return this . waitForDb ( )
43
+ }
44
+ this . dbStatus = "loading"
51
45
this . db = await getDuckDb ( )
52
46
this . conn = await this . db . connect ( )
53
- this . dbStatus = ' loaded' ;
47
+ this . dbStatus = " loaded"
54
48
}
55
49
56
- backgroundDataLoad ( ) {
50
+ backgroundDataLoad ( ) {
57
51
if ( this . complete . length === this . config . length ) {
58
- const remainingData = this . config . filter ( c => ! this . complete . includes ( c . filename ) ) ;
59
- remainingData . forEach ( c => this . fetchData ( c ) ) ;
52
+ const remainingData = this . config . filter ( ( c ) => ! this . complete . includes ( c . filename ) )
53
+ remainingData . forEach ( ( c ) => this . registerData ( c ) )
54
+ }
55
+ }
56
+
57
+ async registerData ( config : DataConfig ) {
58
+ if ( this . complete . includes ( config . filename ) ) {
59
+ return
60
+ }
61
+ await this . initDb ( )
62
+ await this . db ! . registerFileURL (
63
+ config . filename ,
64
+ `${ this . baseURL } /${ config . filename } ` ,
65
+ DuckDBDataProtocol . HTTP ,
66
+ false
67
+ )
68
+ if ( this . completeCallback ) {
69
+ this . completeCallback ( config . filename )
70
+ }
71
+ this . complete . push ( config . filename )
72
+ }
73
+
74
+ getFromQueryString ( filename : string ) {
75
+ if ( this . complete . includes ( filename ) ) {
76
+ return `'${ filename } '`
77
+ } else {
78
+ return `'${ this . baseURL } /${ filename } '`
79
+ }
80
+ }
81
+
82
+ async runQuery ( query : string ) {
83
+ await this . initDb ( )
84
+ try {
85
+ return await runQuery ( {
86
+ conn : this . conn ! ,
87
+ query,
88
+ } )
89
+ } catch ( e ) {
90
+ console . error ( e )
91
+ return [ ]
60
92
}
61
93
}
62
- ingestData ( data : Array < any > , config : DataConfig , dataStore : any ) {
94
+ async getQuantiles ( column : string | number , table : string , n : number ) : Promise < Array < number > > {
95
+ // breakpoints to use for quantile breaks
96
+ // eg. n=5 - 0.2, 0.4, 0.6, 0.8 - 4 breaks
97
+ // eg. n=4 - 0.25, 0.5, 0.75 - 3 breaks
98
+ const quantileFractions = Array . from ( { length : n - 1 } , ( _ , i ) => ( i + 1 ) / n )
99
+ const query = `SELECT
100
+ ${ quantileFractions . map ( ( f , i ) => `approx_quantile("${ column } ", ${ f } ) as break${ i } ` ) }
101
+ FROM ${ this . getFromQueryString ( table ) } ;
102
+ `
103
+ const result = await this . runQuery ( query )
104
+ if ( ! result || result . length === 0 ) {
105
+ console . error ( `No results for quantile query: ${ query } ` )
106
+ return [ ]
107
+ }
108
+ // @ts -ignore
109
+ return Object . values ( result [ 0 ] ) as Array < number >
110
+ }
111
+ async getColorValues (
112
+ idColumn : string ,
113
+ colorScheme : string ,
114
+ reversed : boolean ,
115
+ column : string | number ,
116
+ table : string ,
117
+ n : number
118
+ ) {
119
+ // @ts -ignore
120
+ const d3Colors = d3 [ colorScheme ] ?. [ n ]
121
+ if ( ! d3Colors ) {
122
+ console . error ( `Color scheme ${ colorScheme } with ${ n } bins not found` )
123
+ return {
124
+ colorMap : { } ,
125
+ breaks : [ ] ,
126
+ colors : [ ] ,
127
+ }
128
+ }
129
+ let rgbColors = d3Colors . map ( ( c : any ) => {
130
+ const tc = tinycolor ( c ) . toRgb ( )
131
+ return [ tc . r , tc . g , tc . b ]
132
+ } )
133
+ if ( reversed ) {
134
+ rgbColors . reverse ( )
135
+ }
136
+ const quantiles = await this . getQuantiles ( column , table , n )
137
+ const query = `
138
+ SELECT "${ column } ", "${ idColumn } ",
139
+ CASE
140
+ ${ quantiles . map ( ( q , i ) => `WHEN "${ column } " < ${ q } THEN [${ rgbColors [ i ] } ]` ) . join ( "\n" ) }
141
+ ELSE [${ rgbColors [ rgbColors . length - 1 ] } ]
142
+ END as color
143
+ FROM ${ this . getFromQueryString ( table ) } ;
144
+ `
145
+ // @ts -ignore
146
+ const colorValues = await this . runQuery ( query )
147
+ const colorMap = { }
148
+ for ( let i = 0 ; i < colorValues . length ; i ++ ) {
149
+ // @ts -expect-error
150
+ colorMap [ colorValues [ i ] [ idColumn ] ] = colorValues [ i ] . color . toJSON ( )
151
+ }
152
+ return {
153
+ colorMap,
154
+ breaks : quantiles ,
155
+ colors : rgbColors ,
156
+ }
157
+ }
158
+
159
+ ingestData ( data : Array < any > , config : DataConfig , dataStore : any ) {
63
160
console . log ( config , data [ 0 ] )
64
- for ( let i = 0 ; i < data . length ; i ++ ) {
65
- const row = data [ i ] ;
161
+ for ( let i = 0 ; i < data . length ; i ++ ) {
162
+ const row = data [ i ]
66
163
if ( ! row ?. [ config . id ] ) {
67
- console . error ( `Row ${ i } in ${ config . filename } is missing a valid id` ) ;
68
- continue ;
164
+ console . error ( `Row ${ i } in ${ config . filename } is missing a valid id` )
165
+ continue
69
166
}
70
167
let id = `${ row [ config . id ] } `
71
168
// if (id.length === 10) {
72
169
// id = `0${id}`
73
170
// }
74
171
dataStore [ id ] = {
75
172
...row ,
76
- id
77
- } ;
173
+ id,
174
+ }
78
175
// @ts -ignore
79
176
}
80
- console . log ( "All done!" ) ;
177
+ console . log ( "All done!" )
81
178
if ( this . completeCallback ) {
82
- this . completeCallback ( config . filename ) ;
179
+ this . completeCallback ( config . filename )
83
180
}
84
- this . complete . push ( config . filename ) ;
181
+ this . complete . push ( config . filename )
85
182
}
86
- async fetchData ( config : DataConfig ) {
183
+ async fetchData ( config : DataConfig ) {
87
184
if ( this . complete . includes ( config . filename ) ) {
88
- return ;
185
+ return
89
186
}
90
- await this . initDb ( ) ;
187
+ await this . initDb ( )
91
188
const dataStore = this . data [ config . filename ]
92
189
if ( this . data [ config . filename ] ) {
93
190
// console.error(`Data store already exists for ${config.filename}`);
94
- return ;
191
+ return
95
192
}
96
- this . data [ config . filename ] = { } ;
97
- const r = await runQuery (
98
- this . db ! ,
99
- `SELECT * FROM '${ this . baseURL } /${ config . filename } '`
100
- )
101
- this . ingestData ( r , config , this . data [ config . filename ] ) ;
102
-
193
+ this . data [ config . filename ] = { }
103
194
}
104
195
105
- setCompleteCallback ( cb : ( s : string ) => void ) {
106
- this . completeCallback = cb ;
107
- this . complete . forEach ( cb ) ;
196
+ setCompleteCallback ( cb : ( s : string ) => void ) {
197
+ this . completeCallback = cb
198
+ this . complete . forEach ( cb )
108
199
}
109
- }
200
+ }
201
+
202
+ export const ds = new DataService ( )
0 commit comments