add custom scripting file storage

tdroberto · tdroberto · commit e25c6920722a · 2025-02-04T12:55:05.000+09:00
diff --git a/scenarios/custom_scripting_file_storage/README.md b/scenarios/custom_scripting_file_storage/README.md
@@ -0,0 +1,20 @@
+# Custom scripting file storage
+
+----
+## Overview
+
+This project provides a solution to storing a file temporarily during custom script runtime in a TD Workflow.
+
+----
+## Implementation
+1. Copy and paste the code into a custom script in Treasure Workflows.
+
+----
+## Considerations
+
+N/A
+
+----
+## Questions
+
+Please feel free to reach out to apac-se@treasure-data.com with any questions you have about using this code.
diff --git a/scenarios/custom_scripting_file_storage/config/params.yaml b/scenarios/custom_scripting_file_storage/config/params.yaml
@@ -0,0 +1,4 @@
+db: some_db
+in_tbl: input_tbl
+out_tbl: output_tbl
+api_endpoint: https://api.treasuredata.com
diff --git a/scenarios/custom_scripting_file_storage/custom_scripting_file_storage.dig b/scenarios/custom_scripting_file_storage/custom_scripting_file_storage.dig
@@ -0,0 +1,18 @@
+_export:
+  !include : config/params.yaml
+  td:
+    engine: presto
+    database: ${db}
+
++create_db_tbl_if_not_exist:
+  td_ddl>:
+  create_tables: [ "${out_tbl}" ]
+  empty_tables: [ "${out_tbl}" ]
+
++store:
+  py>: scripts.store.main
+  _env:
+    TD_API_KEY: ${secret:td.apikey}
+    TD_API_ENDPOINT: ${api_endpoint}
+  docker:
+    image: "digdag/digdag-python:3.10"
diff --git a/scenarios/custom_scripting_file_storage/scripts/store.py b/scenarios/custom_scripting_file_storage/scripts/store.py
@@ -0,0 +1,31 @@
+import os
+import pandas as pd
+import pytd
+
+def main(**kwargs):
+  tdAPIkey = os.getenv("TD_API_KEY")
+  tdAPIendpoint = os.getenv("TD_API_ENDPOINT")
+  database = kwargs.get('db')
+  in_table = kwargs.get('in_tbl')
+  out_table = kwargs.get('out_tbl')
+  csv_filename = 'temp.csv'
+
+  td = pytd.Client(apikey=tdAPIkey, 
+              endpoint=tdAPIendpoint, 
+              database=database, 
+              default_engine='presto')
+
+  res = td.query(f'SELECT * FROM {database}.{in_table}')
+  df = pd.DataFrame(**res)
+  print(df)
+  df.to_csv(csv_filename, sep=',', index=False, encoding='utf-8')
+  print("Script directory:", os.path.dirname(os.path.abspath(__file__)))
+  print("Stored csv directory:", os.path.dirname(os.path.abspath(csv_filename)))
+  out_df = pd.read_csv(csv_filename)
+  print(df)
+  
+  td.load_table_from_dataframe(out_df,f'{database}.{out_table}',writer='bulk_import',if_exists='overwrite')
+
+# Main
+if __name__ == "__main__":
+    main()