Skip to content
This repository has been archived by the owner on Nov 6, 2023. It is now read-only.

Commit

Permalink
feat(bigquery): Add config option for specifying datasets to fetch (#37)
Browse files Browse the repository at this point in the history
* Add config option for specifying datasets to fetch

* Use Filter from the sdk

---------

Co-authored-by: Mateusz Kulas <[email protected]>
  • Loading branch information
m-qlas and Mateusz Kulas authored Sep 21, 2023
1 parent a0a9f9b commit 37319d1
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 12 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ If not running on Google Cloud Platform (GCP), this generally requires the envir
type: bigquery_storage
name: bigquery_storage
project: <any_project_name>
datasets_filter: # Optional, if not provided all datasets from the project will be fetched
include: [ <patterns_to_include> ] # List of dataset name patterns to include
exclude: [ <patterns_to_exclude> ] # List of dataset name patterns to exclude
```
### __BigTable__
Expand Down
10 changes: 10 additions & 0 deletions config_examples/bigquery.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
default_pulling_interval: 10
token: "B28sfRmkntxf6YJdIwvN6urB7i7riiMbeEjJCGYY"
platform_host_url: "http://localhost:8080"
plugins:
- type: bigquery_storage
name: bigquery_storage_adapter
project: project_name
datasets_filter: # Optional, if not provided all datasets from the project will be fetched
include: [ patterns_to_include ] # Dataset name patterns to include
exclude: [ patterns_to_exclude ] # Dataset name patterns to exclude
25 changes: 13 additions & 12 deletions odd_collector_gcp/adapters/bigquery_storage/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,18 @@ def __fetch_datasets(self) -> list[BigQueryDataset]:
datasets_iterator = self.client.list_datasets(page_size=self.config.page_size)
for datasets_page in datasets_iterator.pages:
for dr in datasets_page:
tables_iterator = self.client.list_tables(
dr, page_size=self.config.page_size
)
dataset = BigQueryDataset(
data_object=self.client.get_dataset(dr.dataset_id),
tables=[
self.client.get_table(t)
for tables_page in tables_iterator.pages
for t in tables_page
],
)
datasets.append(dataset)
if self.config.datasets_filter.is_allowed(dr.dataset_id):
tables_iterator = self.client.list_tables(
dr, page_size=self.config.page_size
)
dataset = BigQueryDataset(
data_object=self.client.get_dataset(dr.dataset_id),
tables=[
self.client.get_table(t)
for tables_page in tables_iterator.pages
for t in tables_page
],
)
datasets.append(dataset)

return datasets
1 change: 1 addition & 0 deletions odd_collector_gcp/domain/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class GcpPlugin(Plugin):
class BigQueryStoragePlugin(GcpPlugin):
type: Literal["bigquery_storage"]
page_size: Optional[int] = 100
datasets_filter: Filter = Filter()


class BigTablePlugin(GcpPlugin):
Expand Down

0 comments on commit 37319d1

Please sign in to comment.