diff --git a/cloud-operations/scheduled-asset-inventory-export-bq/README.md b/cloud-operations/scheduled-asset-inventory-export-bq/README.md
index 4c9d2ae79f..f1fabbe86b 100644
--- a/cloud-operations/scheduled-asset-inventory-export-bq/README.md
+++ b/cloud-operations/scheduled-asset-inventory-export-bq/README.md
@@ -36,7 +36,21 @@ Once done testing, you can clean up resources by running `terraform destroy`. To
Once resources are created, you can run queries on the data you exported on Bigquery. [Here](https://cloud.google.com/asset-inventory/docs/exporting-to-bigquery#querying_an_asset_snapshot) you can find some example of queries you can run.
-You can also create a dashboard connecting [Datalab](https://datastudio.google.com/) or any other BI tools of your choice to your Bigquery datase.
+You can also create a dashboard connecting [Datalab](https://datastudio.google.com/) or any other BI tools of your choice to your Bigquery dataset.
+
+## File exporter for JSON, CSV (optional).
+
+This is an optional part.
+
+Regular file-based exports of data from Cloud Asset Inventory may be useful for e.g. scale-out network dependencies discovery tools like [Planet Exporter](https://github.com/williamchanrico/planet-exporter), or to update legacy workloads tracking or configuration management systems. Bigquery supports multiple [export formats](https://cloud.google.com/bigquery/docs/exporting-data#export_formats_and_compression_types) and one may upload objects to Storage Bucket using provided Cloud Function. Specify `job.DestinationFormat` as defined in [documentation](https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.job.DestinationFormat.html), e.g. `NEWLINE_DELIMITED_JSON`.
+
+It helps to create custom [scheduled query](https://cloud.google.com/bigquery/docs/scheduling-queries#console) from CAI export tables, and to write out results in to dedicated table (with overwrites). Define such query's output columns to comply with downstream systems' fields requirements, and time query execution after CAI export into BQ for freshness. See [sample queries](https://cloud.google.com/asset-inventory/docs/exporting-to-bigquery-sample-queries).
+
+This is an optional part, created if `cai_gcs_export` is set to `true`. The high level diagram extends to the following:
+
+
+
+
@@ -44,12 +58,16 @@ You can also create a dashboard connecting [Datalab](https://datastudio.google.c
| name | description | type | required | default |
|---|---|:---:|:---:|:---:|
-| cai_config | Cloud Asset inventory export config. | object({…})
| ✓ | |
+| cai_config | Cloud Asset Inventory export config. | object({…})
| ✓ | |
| project_id | Project id that references existing project. | string
| ✓ | |
| billing_account | Billing account id used as default for new projects. | string
| | null
|
| bundle_path | Path used to write the intermediate Cloud Function code bundle. | string
| | "./bundle.zip"
|
+| bundle_path_cffile | Path used to write the intermediate Cloud Function code bundle. | string
| | "./bundle_cffile.zip"
|
+| cai_gcs_export | Enable optional part to export tables to GCS | bool
| | false
|
+| file_config | Optional BQ table as a file export function config. | object({…})
| | {…}
|
| location | Appe Engine location used in the example. | string
| | "europe-west"
|
| name | Arbitrary string used to name created resources. | string
| | "asset-inventory"
|
+| name_cffile | Arbitrary string used to name created resources. | string
| | "cffile-exporter"
|
| project_create | Create project instead ofusing an existing one. | bool
| | true
|
| region | Compute region used in the example. | string
| | "europe-west1"
|
| root_node | The resource name of the parent folder or organization for project creation, in 'folders/folder_id' or 'organizations/org_id' format. | string
| | null
|
@@ -63,3 +81,4 @@ You can also create a dashboard connecting [Datalab](https://datastudio.google.c
+
diff --git a/cloud-operations/scheduled-asset-inventory-export-bq/cf/main.py b/cloud-operations/scheduled-asset-inventory-export-bq/cf/main.py
index ad97c3262f..9f9cfb3f05 100755
--- a/cloud-operations/scheduled-asset-inventory-export-bq/cf/main.py
+++ b/cloud-operations/scheduled-asset-inventory-export-bq/cf/main.py
@@ -50,18 +50,19 @@ def _configure_logging(verbose=True):
@click.option('--bq-project', required=True, help='Bigquery project to use.')
@click.option('--bq-dataset', required=True, help='Bigquery dataset to use.')
@click.option('--bq-table', required=True, help='Bigquery table name to use.')
+@click.option('--bq-table-overwrite', required=True, help='Overwrite existing BQ table or create new datetime() one.')
@click.option('--target-node', required=True, help='Node in Google Cloud resource hierarchy.')
@click.option('--read-time', required=False, help=(
'Day to take an asset snapshot in \'YYYYMMDD\' format, uses current day '
' as default. Export will run at midnight of the specified day.'))
@click.option('--verbose', is_flag=True, help='Verbose output')
-def main_cli(project=None, bq_project=None, bq_dataset=None, bq_table=None, target_node=None,
+def main_cli(project=None, bq_project=None, bq_dataset=None, bq_table=None, bq_table_overwrite=None, target_node=None,
read_time=None, verbose=False):
'''Trigger Cloud Asset inventory export to Bigquery. Data will be stored in
the dataset specified on a dated table with the name specified.
'''
try:
- _main(project, bq_project, bq_dataset, bq_table, target_node, read_time, verbose)
+ _main(project, bq_project, bq_dataset, bq_table, bq_table_overwrite, target_node, read_time, verbose)
except RuntimeError:
logging.exception('exception raised')
@@ -79,19 +80,22 @@ def main(event, context):
logging.exception('exception in cloud function entry point')
-def _main(project=None, bq_project=None, bq_dataset=None, bq_table=None, target_node=None, read_time=None, verbose=False):
+def _main(project=None, bq_project=None, bq_dataset=None, bq_table=None, bq_table_overwrite=None, target_node=None, read_time=None, verbose=False):
'Module entry point used by cli and cloud function wrappers.'
_configure_logging(verbose)
- if not read_time:
- read_time = datetime.datetime.now()
+ output_config = asset_v1.OutputConfig()
client = asset_v1.AssetServiceClient()
+ if bq_table_overwrite == False:
+ read_time = datetime.datetime.now()
+ output_config.bigquery_destination.table = '%s_%s' % (
+ bq_table, read_time.strftime('%Y%m%d'))
+ else:
+ output_config.bigquery_destination.table = '%s_latest' % (
+ bq_table)
content_type = asset_v1.ContentType.RESOURCE
- output_config = asset_v1.OutputConfig()
output_config.bigquery_destination.dataset = 'projects/%s/datasets/%s' % (
bq_project, bq_dataset)
- output_config.bigquery_destination.table = '%s_%s' % (
- bq_table, read_time.strftime('%Y%m%d'))
output_config.bigquery_destination.separate_tables_per_asset_type = True
output_config.bigquery_destination.force = True
try:
diff --git a/cloud-operations/scheduled-asset-inventory-export-bq/cffile/main.py b/cloud-operations/scheduled-asset-inventory-export-bq/cffile/main.py
new file mode 100755
index 0000000000..cb54b0bc68
--- /dev/null
+++ b/cloud-operations/scheduled-asset-inventory-export-bq/cffile/main.py
@@ -0,0 +1,99 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+'''Cloud Function module to export BQ table as JSON.
+
+This module is designed to be plugged in a Cloud Function, attached to Cloud
+Scheduler trigger to create a JSON of IP to hostname mappings from BigQuery.
+
+'''
+
+import base64
+import datetime
+import json
+import logging
+import os
+import warnings
+
+from google.api_core.exceptions import GoogleAPIError
+from google.cloud import bigquery
+
+import click
+
+import googleapiclient.discovery
+import googleapiclient.errors
+
+
+def _configure_logging(verbose=True):
+ '''Basic logging configuration.
+ Args:
+ verbose: enable verbose logging
+ '''
+ level = logging.DEBUG if verbose else logging.INFO
+ logging.basicConfig(level=level)
+ warnings.filterwarnings('ignore', r'.*end user credentials.*', UserWarning)
+
+@click.command()
+@click.option('--bucket', required=True, help='GCS bucket for export')
+@click.option('--filename', required=True, help='Path and filename with extension to export e.g. folder/export.json .')
+@click.option('--format', required=True, help='The exported file format, e.g. NEWLINE_DELIMITED_JSON or CSV.')
+@click.option('--bq-dataset', required=True, help='Bigquery dataset where table for export is located.')
+@click.option('--bq-table', required=True, help='Bigquery table to export.')
+@click.option('--verbose', is_flag=True, help='Verbose output')
+def main_cli(bucket=None, filename=None, format=None, bq_dataset=None, bq_table=None, verbose=False):
+ '''Trigger Cloud Asset inventory export from Bigquery to file. Data will be stored in
+ the dataset specified on a dated table with the name specified.
+ '''
+ try:
+ _main(bucket, filename, format, bq_dataset, bq_table, verbose)
+ except RuntimeError:
+ logging.exception('exception raised')
+
+def main(event, context):
+ 'Cloud Function entry point.'
+ try:
+ data = json.loads(base64.b64decode(event['data']).decode('utf-8'))
+ print(data)
+ _main(**data)
+ # uncomment once https://issuetracker.google.com/issues/155215191 is fixed
+ # except RuntimeError:
+ # raise
+ except Exception:
+ logging.exception('exception in cloud function entry point')
+
+
+def _main(bucket=None, filename=None, format=None, bq_dataset=None, bq_table=None, verbose=False):
+ 'Module entry point used by cli and cloud function wrappers.'
+
+ _configure_logging(verbose)
+ client = bigquery.Client()
+ destination_uri = 'gs://{}/{}'.format(bucket, filename)
+ dataset_ref = client.dataset(bq_dataset)
+ table_ref = dataset_ref.table(bq_table)
+ job_config = bigquery.job.ExtractJobConfig()
+ job_config.destination_format = (
+ getattr(bigquery.DestinationFormat, format) )
+ extract_job = client.extract_table(
+ table_ref, destination_uri, job_config=job_config
+ )
+ try:
+ extract_job.result()
+ except (GoogleAPIError, googleapiclient.errors.HttpError) as e:
+ logging.debug('API Error: %s', e, exc_info=True)
+ raise RuntimeError(
+ 'Error exporting BQ table %s as a file' % bq_table, e)
+
+
+if __name__ == '__main__':
+ main_cli()
\ No newline at end of file
diff --git a/cloud-operations/scheduled-asset-inventory-export-bq/cffile/requirements.txt b/cloud-operations/scheduled-asset-inventory-export-bq/cffile/requirements.txt
new file mode 100644
index 0000000000..d48ebb5475
--- /dev/null
+++ b/cloud-operations/scheduled-asset-inventory-export-bq/cffile/requirements.txt
@@ -0,0 +1,3 @@
+google-api-python-client>=1.10.1
+google-cloud-monitoring>=1.1.0
+google-cloud-bigquery
\ No newline at end of file
diff --git a/cloud-operations/scheduled-asset-inventory-export-bq/diagram_optional.png b/cloud-operations/scheduled-asset-inventory-export-bq/diagram_optional.png
new file mode 100644
index 0000000000..36111c6753
Binary files /dev/null and b/cloud-operations/scheduled-asset-inventory-export-bq/diagram_optional.png differ
diff --git a/cloud-operations/scheduled-asset-inventory-export-bq/main.tf b/cloud-operations/scheduled-asset-inventory-export-bq/main.tf
index 0052401d99..39c1e37dfb 100644
--- a/cloud-operations/scheduled-asset-inventory-export-bq/main.tf
+++ b/cloud-operations/scheduled-asset-inventory-export-bq/main.tf
@@ -14,6 +14,8 @@
* limitations under the License.
*/
+
+
###############################################################################
# Projects #
###############################################################################
@@ -47,6 +49,7 @@ module "service-account" {
iam_project_roles = {
(var.project_id) = [
"roles/cloudasset.owner",
+ "roles/bigquery.jobUser"
]
}
}
@@ -66,6 +69,17 @@ module "pubsub" {
# at the project level via roles/cloudscheduler.serviceAgent
}
+module "pubsub_file" {
+ source = "../../modules/pubsub"
+ project_id = module.project.project_id
+ name = var.name_cffile
+ subscriptions = {
+ "${var.name_cffile}-default" = null
+ }
+ # the Cloud Scheduler robot service account already has pubsub.topics.publish
+ # at the project level via roles/cloudscheduler.serviceAgent
+}
+
###############################################################################
# Cloud Function #
###############################################################################
@@ -93,6 +107,30 @@ module "cf" {
}
}
+module "cffile" {
+ count = var.cai_gcs_export ? 1 : 0
+ source = "../../modules/cloud-function"
+ project_id = module.project.project_id
+ region = var.region
+ name = var.name_cffile
+ bucket_name = "${var.name_cffile}-${random_pet.random.id}"
+ bucket_config = {
+ location = var.region
+ lifecycle_delete_age = null
+ }
+ bundle_config = {
+ source_dir = "cffile"
+ output_path = var.bundle_path_cffile
+ excludes = null
+ }
+ service_account = module.service-account.email
+ trigger_config = {
+ event = "google.pubsub.topic.publish"
+ resource = module.pubsub_file.topic.id
+ retry = null
+ }
+}
+
resource "random_pet" "random" {
length = 1
}
@@ -118,11 +156,34 @@ resource "google_cloud_scheduler_job" "job" {
attributes = {}
topic_name = module.pubsub.topic.id
data = base64encode(jsonencode({
- project = module.project.project_id
- bq_project = module.project.project_id
- bq_dataset = var.cai_config.bq_dataset
- bq_table = var.cai_config.bq_table
- target_node = var.cai_config.target_node
+ project = module.project.project_id
+ bq_project = module.project.project_id
+ bq_dataset = var.cai_config.bq_dataset
+ bq_table = var.cai_config.bq_table
+ bq_table_overwrite = var.cai_config.bq_table_overwrite
+ target_node = var.cai_config.target_node
+ }))
+ }
+}
+
+resource "google_cloud_scheduler_job" "job_file" {
+ count = var.cai_gcs_export ? 1 : 0
+ project = google_app_engine_application.app.project
+ region = var.region
+ name = "file-export-job"
+ description = "File export from BQ Job"
+ schedule = "* 9 * * 1"
+ time_zone = "Etc/UTC"
+
+ pubsub_target {
+ attributes = {}
+ topic_name = module.pubsub_file.topic.id
+ data = base64encode(jsonencode({
+ bucket = var.file_config.bucket
+ filename = var.file_config.filename
+ format = var.file_config.format
+ bq_dataset = var.file_config.bq_dataset
+ bq_table = var.file_config.bq_table
}))
}
}
diff --git a/cloud-operations/scheduled-asset-inventory-export-bq/variables.tf b/cloud-operations/scheduled-asset-inventory-export-bq/variables.tf
index 5bb62166c3..b31291b8ad 100644
--- a/cloud-operations/scheduled-asset-inventory-export-bq/variables.tf
+++ b/cloud-operations/scheduled-asset-inventory-export-bq/variables.tf
@@ -26,15 +26,50 @@ variable "bundle_path" {
default = "./bundle.zip"
}
+
+variable "bundle_path_cffile" {
+ description = "Path used to write the intermediate Cloud Function code bundle."
+ type = string
+ default = "./bundle_cffile.zip"
+}
+
variable "cai_config" {
- description = "Cloud Asset inventory export config."
+ description = "Cloud Asset Inventory export config."
+ type = object({
+ bq_dataset = string
+ bq_table = string
+ bq_table_overwrite = bool
+ target_node = string
+ })
+}
+
+
+variable "cai_gcs_export" {
+ description = "Enable optional part to export tables to GCS"
+ type = bool
+ default = false
+}
+
+
+variable "file_config" {
+ description = "Optional BQ table as a file export function config."
type = object({
- bq_dataset = string
- bq_table = string
- target_node = string
+ bucket = string
+ filename = string
+ format = string
+ bq_dataset = string
+ bq_table = string
})
+ default = {
+ bucket = null
+ filename = null
+ format = null
+ bq_dataset = null
+ bq_table = null
+ }
}
+
variable "location" {
description = "Appe Engine location used in the example."
type = string
@@ -48,6 +83,15 @@ variable "name" {
default = "asset-inventory"
}
+
+
+variable "name_cffile" {
+ description = "Arbitrary string used to name created resources."
+ type = string
+ default = "cffile-exporter"
+}
+
+
variable "project_create" {
description = "Create project instead ofusing an existing one."
type = bool
diff --git a/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/bundle_cffile.zip b/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/bundle_cffile.zip
new file mode 100644
index 0000000000..454bc1f7c7
Binary files /dev/null and b/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/bundle_cffile.zip differ
diff --git a/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/cffile/README b/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/cffile/README
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/main.tf b/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/main.tf
index b892dadb74..f7bd01af0b 100644
--- a/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/main.tf
+++ b/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/main.tf
@@ -18,6 +18,8 @@ module "test" {
source = "../../../../cloud-operations/scheduled-asset-inventory-export-bq"
billing_account = var.billing_account
cai_config = var.cai_config
+ cai_gcs_export = var.cai_gcs_export
+ file_config = var.file_config
project_create = var.project_create
project_id = var.project_id
}
diff --git a/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/variables.tf b/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/variables.tf
index 1d70f8272d..d80431e339 100644
--- a/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/variables.tf
+++ b/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/variables.tf
@@ -19,17 +19,42 @@ variable "billing_account" {
variable "cai_config" {
type = object({
- bq_dataset = string
- bq_table = string
- target_node = string
+ bq_dataset = string
+ bq_table = string
+ bq_table_overwrite = bool
+ target_node = string
})
default = {
- bq_dataset = "my-dataset"
- bq_table = "my_table"
- target_node = "organization/1234567890"
+ bq_dataset = "my-dataset"
+ bq_table = "my_table"
+ bq_table_overwrite = "true"
+ target_node = "organization/1234567890"
}
}
+variable "cai_gcs_export" {
+ type = bool
+ default = true
+}
+
+variable "file_config" {
+ type = object({
+ bucket = string
+ filename = string
+ format = string
+ bq_dataset = string
+ bq_table = string
+ })
+ default = {
+ bucket = "my-bucket"
+ filename = "my-folder/myfile.json"
+ format = "NEWLINE_DELIMITED_JSON"
+ bq_dataset = "my-dataset"
+ bq_table = "my_table"
+ }
+}
+
+
variable "project_create" {
type = bool
default = true
diff --git a/tests/cloud_operations/scheduled_asset_inventory_export_bq/test_plan.py b/tests/cloud_operations/scheduled_asset_inventory_export_bq/test_plan.py
index de94c82d54..484496a57b 100644
--- a/tests/cloud_operations/scheduled_asset_inventory_export_bq/test_plan.py
+++ b/tests/cloud_operations/scheduled_asset_inventory_export_bq/test_plan.py
@@ -23,5 +23,5 @@
def test_resources(e2e_plan_runner):
"Test that plan works and the numbers of resources is as expected."
modules, resources = e2e_plan_runner(FIXTURES_DIR)
- assert len(modules) == 5
- assert len(resources) == 23
+ assert len(modules) == 7
+ assert len(resources) == 29