Skip to content
This repository has been archived by the owner on Nov 6, 2023. It is now read-only.

Add config option for specifying datasets to fetch #37

Merged
merged 3 commits into from
Sep 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ If not running on Google Cloud Platform (GCP), this generally requires the envir
type: bigquery_storage
name: bigquery_storage
project: <any_project_name>
datasets_filter: # Optional, if not provided all datasets from the project will be fetched
include: [ <patterns_to_include> ] # List of dataset name patterns to include
exclude: [ <patterns_to_exclude> ] # List of dataset name patterns to exclude
```

### __BigTable__
Expand Down
10 changes: 10 additions & 0 deletions config_examples/bigquery.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
default_pulling_interval: 10
token: "B28sfRmkntxf6YJdIwvN6urB7i7riiMbeEjJCGYY"
platform_host_url: "http://localhost:8080"
plugins:
- type: bigquery_storage
name: bigquery_storage_adapter
project: project_name
datasets_filter: # Optional, if not provided all datasets from the project will be fetched
include: [ patterns_to_include ] # Dataset name patterns to include
exclude: [ patterns_to_exclude ] # Dataset name patterns to exclude
25 changes: 13 additions & 12 deletions odd_collector_gcp/adapters/bigquery_storage/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,18 @@ def __fetch_datasets(self) -> list[BigQueryDataset]:
datasets_iterator = self.client.list_datasets(page_size=self.config.page_size)
for datasets_page in datasets_iterator.pages:
for dr in datasets_page:
tables_iterator = self.client.list_tables(
dr, page_size=self.config.page_size
)
dataset = BigQueryDataset(
data_object=self.client.get_dataset(dr.dataset_id),
tables=[
self.client.get_table(t)
for tables_page in tables_iterator.pages
for t in tables_page
],
)
datasets.append(dataset)
if self.config.datasets_filter.is_allowed(dr.dataset_id):
tables_iterator = self.client.list_tables(
dr, page_size=self.config.page_size
)
dataset = BigQueryDataset(
data_object=self.client.get_dataset(dr.dataset_id),
tables=[
self.client.get_table(t)
for tables_page in tables_iterator.pages
for t in tables_page
],
)
datasets.append(dataset)

return datasets
1 change: 1 addition & 0 deletions odd_collector_gcp/domain/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class GcpPlugin(Plugin):
class BigQueryStoragePlugin(GcpPlugin):
type: Literal["bigquery_storage"]
page_size: Optional[int] = 100
datasets_filter: Filter = Filter()


class BigTablePlugin(GcpPlugin):
Expand Down