Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions python-connectors/googlesheets-sheet/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
from slugify import slugify
from googlesheets import GoogleSheetsSession
from safe_logger import SafeLogger
from googlesheets_common import DSSConstants, extract_credentials, get_tab_ids, mark_date_columns, convert_dates_in_row
from googlesheets_common import (
DSSConstants, extract_credentials, get_tab_ids, mark_date_columns,
convert_dates_in_row, should_process_worksheet
)
from googlesheets_append import append_rows


Expand Down Expand Up @@ -58,7 +61,7 @@ def generate_rows(self, dataset_schema=None, dataset_partitioning=None,
worksheets = self.session.get_spreadsheets(self.doc_id)

for worksheet in worksheets:
if self.tabs_ids and (worksheet.title not in self.tabs_ids):
if not should_process_worksheet(worksheet, self.tabs_ids):
continue
rows = worksheet.get_all_values()
try:
Expand Down
17 changes: 16 additions & 1 deletion python-lib/googlesheets.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,14 @@ def get_spreadsheets(self, document_id, tab_id=None):
try:
# worksheet and worksheets both make a single fetch_sheet_metadata request
# so better use one worksheets than multiple worksheet
if tab_id:
if (tab_id is not None) and isinstance(tab_id, str):
# tab_id contains the name of the worksheet
# we can search directly for the right worksheet
return [self.client.open_by_key(document_id).worksheet(tab_id)]
elif (tab_id is not None) and isinstance(tab_id, int):
# tab_id contains the id of the worksheet
# we grab them all and iterate later
worksheets = self.client.open_by_key(document_id).worksheets()
else:
return self.client.open_by_key(document_id).worksheets()
except gspread.exceptions.SpreadsheetNotFound as error:
Expand All @@ -83,6 +89,15 @@ def get_spreadsheets(self, document_id, tab_id=None):
if error_status == 'FAILED_PRECONDITION':
raise Exception("This document is not a Google Sheet. Please use the Google Drive plugin instead.")
raise Exception("The Google API returned an error: %s" % error)
for worksheet in worksheets:
# tab_id contains the worksheet's id
# we need to iterate to find the right one
if tab_id == -1:
# Workaround for DSS UI issue with 0
tab_id = 0
if tab_id == worksheet.id:
return [worksheet]
raise Exception("The spreadsheets ID {} was not found".format(tab_id))

def get_spreadsheet_title(self, document_id):
try:
Expand Down
19 changes: 18 additions & 1 deletion python-lib/googlesheets_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def get_tab_ids(config):
legacy_tab_id = config.get("tab_id", None)
tabs_ids = config.get("tabs_ids")
tabs_ids = tabs_ids or []
if type(tabs_ids) == str:
if type(tabs_ids) in [str, int]:
tabs_ids = [tabs_ids]
if not tabs_ids:
if legacy_tab_id:
Expand Down Expand Up @@ -124,3 +124,20 @@ def convert_dates_in_row(row, date_columns):
row[date_column] = format_date(
row[date_column], DSSConstants.DSS_DATE_FORMAT, DSSConstants.GSPREAD_DATE_FORMAT)
return row


def should_process_worksheet(worksheet, tabs_ids):
if not tabs_ids: # could not be [0]
# if nothing is defined, the whole spreadsheets needs to be processed
return True
first_tab_id = tabs_ids[0]
if isinstance(first_tab_id, str):
if worksheet.name in tabs_ids:
return True
elif isinstance(first_tab_id, int):
worksheet_id = worksheet.id
if worksheet_id == 0:
worksheet_id = -1
if worksheet_id in tabs_ids:
return True
return False
7 changes: 5 additions & 2 deletions python-runnables/import-sheets-into-project/runnable.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import dataiku
from dataiku.runnables import Runnable, ResultTable
from googlesheets_common import DSSConstants, extract_credentials, get_unique_slugs, get_unique_names
from googlesheets_common import (
DSSConstants, extract_credentials, get_unique_slugs, get_unique_names,
should_process_worksheet
)
from googlesheets import GoogleSheetsSession
from safe_logger import SafeLogger

Expand Down Expand Up @@ -68,7 +71,7 @@ def run(self, progress_callback):
index = 0
for worksheet in self.worksheets:
worksheet_title = worksheet.title
if worksheet_title in self.tabs_ids:
if should_process_worksheet(worksheet, self.tabs_ids):
index += 1
progress_callback(index)
dataset = None
Expand Down
7 changes: 6 additions & 1 deletion resource/browse_sheets.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,13 @@ def do(payload, config, plugin_config, inputs):
choices = []
for worksheet in worksheets:
worksheet_title = "{}".format(worksheet.title)
worksheet_id = worksheet.id
if worksheet_id == 0:
# DSS won't store in the presets if the value is 0
# and the id for the first (default) sheet in the spreadsheet is 0
worksheet_id = -1 # Workaround
choices.append({
"label": worksheet_title,
"value": worksheet_title
"value": worksheet_id
})
return build_select_choices(choices)