dataiku · alexbourret · Feb 20, 2025 · Feb 20, 2025 · Feb 20, 2025 · Feb 20, 2025
diff --git a/python-connectors/googlesheets-sheet/connector.py b/python-connectors/googlesheets-sheet/connector.py
@@ -5,7 +5,10 @@
 from slugify import slugify
 from googlesheets import GoogleSheetsSession
 from safe_logger import SafeLogger
-from googlesheets_common import DSSConstants, extract_credentials, get_tab_ids, mark_date_columns, convert_dates_in_row
+from googlesheets_common import (
+    DSSConstants, extract_credentials, get_tab_ids, mark_date_columns,
+    convert_dates_in_row, should_process_worksheet
+)
 from googlesheets_append import append_rows
 
 
@@ -58,7 +61,7 @@ def generate_rows(self, dataset_schema=None, dataset_partitioning=None,
         worksheets = self.session.get_spreadsheets(self.doc_id)
 
         for worksheet in worksheets:
-            if self.tabs_ids and (worksheet.title not in self.tabs_ids):
+            if not should_process_worksheet(worksheet, self.tabs_ids):
                 continue
             rows = worksheet.get_all_values()
             try:

diff --git a/python-lib/googlesheets.py b/python-lib/googlesheets.py
@@ -60,8 +60,14 @@ def get_spreadsheets(self, document_id, tab_id=None):
         try:
             # worksheet and worksheets both make a single fetch_sheet_metadata request
             # so better use one worksheets than multiple worksheet
-            if tab_id:
+            if (tab_id is not None) and isinstance(tab_id, str):
+                # tab_id contains the name of the worksheet
+                # we can search directly for the right worksheet
                 return [self.client.open_by_key(document_id).worksheet(tab_id)]
+            elif (tab_id is not None) and isinstance(tab_id, int):
+                # tab_id contains the id of the worksheet
+                # we grab them all and iterate later
+                worksheets = self.client.open_by_key(document_id).worksheets()
             else:
                 return self.client.open_by_key(document_id).worksheets()
         except gspread.exceptions.SpreadsheetNotFound as error:
@@ -83,6 +89,15 @@ def get_spreadsheets(self, document_id, tab_id=None):
                 if error_status == 'FAILED_PRECONDITION':
                     raise Exception("This document is not a Google Sheet. Please use the Google Drive plugin instead.")
             raise Exception("The Google API returned an error: %s" % error)
+        for worksheet in worksheets:
+            # tab_id contains the worksheet's id
+            # we need to iterate to find the right one
+            if tab_id == -1:
+                # Workaround for DSS UI issue with 0
+                tab_id = 0
+            if tab_id == worksheet.id:
+                return [worksheet]
+        raise Exception("The spreadsheets ID {} was not found".format(tab_id))
 
     def get_spreadsheet_title(self, document_id):
         try:

diff --git a/python-lib/googlesheets_common.py b/python-lib/googlesheets_common.py
@@ -57,7 +57,7 @@ def get_tab_ids(config):
     legacy_tab_id = config.get("tab_id", None)
     tabs_ids = config.get("tabs_ids")
     tabs_ids = tabs_ids or []
-    if type(tabs_ids) == str:
+    if type(tabs_ids) in [str, int]:
         tabs_ids = [tabs_ids]
     if not tabs_ids:
         if legacy_tab_id:
@@ -124,3 +124,20 @@ def convert_dates_in_row(row, date_columns):
         row[date_column] = format_date(
             row[date_column], DSSConstants.DSS_DATE_FORMAT, DSSConstants.GSPREAD_DATE_FORMAT)
     return row
+
+
+def should_process_worksheet(worksheet, tabs_ids):
+    if not tabs_ids: # could not be [0]
+        # if nothing is defined, the whole spreadsheets needs to be processed
+        return True
+    first_tab_id = tabs_ids[0]
+    if isinstance(first_tab_id, str):
+        if worksheet.name in tabs_ids:
+            return True
+    elif isinstance(first_tab_id, int):
+        worksheet_id = worksheet.id
+        if worksheet_id == 0:
+            worksheet_id = -1
+        if worksheet_id in tabs_ids:
+            return True
+    return False
diff --git a/python-runnables/import-sheets-into-project/runnable.py b/python-runnables/import-sheets-into-project/runnable.py
@@ -1,6 +1,9 @@
 import dataiku
 from dataiku.runnables import Runnable, ResultTable
-from googlesheets_common import DSSConstants, extract_credentials, get_unique_slugs, get_unique_names
+from googlesheets_common import (
+    DSSConstants, extract_credentials, get_unique_slugs, get_unique_names,
+    should_process_worksheet
+)
 from googlesheets import GoogleSheetsSession
 from safe_logger import SafeLogger
 
@@ -68,7 +71,7 @@ def run(self, progress_callback):
         index = 0
         for worksheet in self.worksheets:
             worksheet_title = worksheet.title
-            if worksheet_title in self.tabs_ids:
+            if should_process_worksheet(worksheet, self.tabs_ids):
                 index += 1
                 progress_callback(index)
                 dataset = None

diff --git a/resource/browse_sheets.py b/resource/browse_sheets.py
@@ -40,8 +40,13 @@ def do(payload, config, plugin_config, inputs):
         choices = []
         for worksheet in worksheets:
             worksheet_title = "{}".format(worksheet.title)
+            worksheet_id = worksheet.id
+            if worksheet_id == 0:
+                # DSS won't store in the presets if the value is 0
+                # and the id for the first (default) sheet in the spreadsheet is 0
+                worksheet_id = -1  # Workaround
             choices.append({
                 "label": worksheet_title,
-                "value": worksheet_title
+                "value": worksheet_id
             })
         return build_select_choices(choices)