diff --git a/Gemfile b/Gemfile index b46bed8d3..59dd23939 100644 --- a/Gemfile +++ b/Gemfile @@ -75,6 +75,7 @@ gem 'aws-sdk-bedrockruntime', '~> 1.66' gem "google-cloud-storage", "~> 1.57.1" gem "google-api-client", "~> 0.7.1" gem "googleauth", "~> 1.15.0" +gem 'google-apis-sheets_v4', "~> 0.46" # Use postgresql as the database for Active Record diff --git a/Gemfile.lock b/Gemfile.lock index 4bd298b9c..b17713cac 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -80,8 +80,8 @@ GEM securerandom (>= 0.3) tzinfo (~> 2.0, >= 2.0.5) uri (>= 0.13.1) - addressable (2.8.7) - public_suffix (>= 2.0.2, < 7.0) + addressable (2.8.8) + public_suffix (>= 2.0.2, < 8.0) afm (1.0.0) alba (3.9.1) ansi (1.5.0) @@ -279,12 +279,13 @@ GEM faraday (>= 1, < 3) faraday-multipart (1.1.1) multipart-post (~> 2.0) - faraday-net_http (3.4.1) - net-http (>= 0.5.0) + faraday-net_http (3.4.2) + net-http (~> 0.5) faraday-retry (2.3.2) faraday (~> 2.0) faster_s3_url (1.2.0) fastimage (2.4.0) + ffi (1.17.2) ffi (1.17.2-arm64-darwin) ffi (1.17.2-x86_64-linux-gnu) ffi-compiler (1.3.2) @@ -324,6 +325,8 @@ GEM google-apis-core (>= 0.15.0, < 2.a) google-apis-iamcredentials_v1 (0.26.0) google-apis-core (>= 0.15.0, < 2.a) + google-apis-sheets_v4 (0.46.0) + google-apis-core (>= 0.15.0, < 2.a) google-apis-storage_v1 (0.57.0) google-apis-core (>= 0.15.0, < 2.a) google-cloud-core (1.8.0) @@ -453,11 +456,12 @@ GEM matrix (0.4.3) method_source (1.1.0) mini_mime (1.1.5) + mini_portile2 (2.8.9) minitar (1.1.0) minitest (5.26.0) mono_logger (1.1.2) msgpack (1.8.0) - multi_json (1.17.0) + multi_json (1.18.0) multi_xml (0.7.2) bigdecimal (~> 3.1) multipart-post (2.4.1) @@ -468,8 +472,8 @@ GEM racc (~> 1.7) neighbor (0.6.0) activerecord (>= 7.1) - net-http (0.6.0) - uri + net-http (0.9.1) + uri (>= 0.11.1) net-imap (0.5.12) date net-protocol @@ -480,6 +484,9 @@ GEM net-smtp (0.5.1) net-protocol nio4r (2.7.5) + nokogiri (1.18.10) + mini_portile2 (~> 2.8.2) + racc (~> 1.4) nokogiri (1.18.10-arm64-darwin) racc (~> 1.4) nokogiri (1.18.10-x86_64-linux-gnu) @@ -535,6 +542,7 @@ GEM hashery (~> 2.0) ruby-rc4 ttfunk + pg (1.6.2) pg (1.6.2-arm64-darwin) pg (1.6.2-x86_64-linux) pp (0.6.3) @@ -566,7 +574,7 @@ GEM psych (5.2.6) date stringio - public_suffix (6.0.2) + public_suffix (7.0.0) puma (7.1.0) nio4r (~> 2.0) qa (5.15.0) @@ -804,7 +812,7 @@ GEM aws-sdk-s3 (~> 1.0) content_disposition (~> 1.0) roda (>= 2.27, < 4) - uri (1.0.4) + uri (1.1.1) useragent (0.16.11) uuidtools (3.0.0) version_gem (1.1.9) @@ -850,6 +858,7 @@ GEM PLATFORMS arm64-darwin-21 arm64-darwin-23 + ruby x86_64-linux DEPENDENCIES @@ -889,6 +898,7 @@ DEPENDENCIES faster_s3_url (~> 1.1) font-awesome-rails (~> 4.7) google-api-client (~> 0.7.1) + google-apis-sheets_v4 (~> 0.46) google-cloud-storage (~> 1.57.1) googleauth (~> 1.15.0) hirefire-resource (>= 0.10.1) diff --git a/app/services/google_arts_and_culture/spreadsheet_creator.rb b/app/services/google_arts_and_culture/spreadsheet_creator.rb new file mode 100644 index 000000000..cd2375a97 --- /dev/null +++ b/app/services/google_arts_and_culture/spreadsheet_creator.rb @@ -0,0 +1,76 @@ +require 'open-uri' +require 'googleauth' +require 'googleauth/stores/file_token_store' +require 'fileutils' +require 'csv' +require 'google/apis/sheets_v4' + + +module GoogleArtsAndCulture + class SpreadsheetCreator + + APPLICATION_NAME = 'Science History Institute Digital Collections' + OOB_URI = 'urn:ietf:wg:oauth:2.0:oob' + + CREDENTIALS_PATH = Dir.home + + #csv_data is a Ruby array of arrays + def create_google_sheet(csv_data) + if csv_data.empty? + puts "Empty metadata." + return + end + spreadsheet = blank_spreadsheet + authorized_service.update_spreadsheet_value( + spreadsheet.spreadsheet_id, + 'Sheet1!A1:Z' + csv_data.length.to_s, + Google::Apis::SheetsV4::ValueRange.new(values: csv_data), + value_input_option: 'USER_ENTERED' + ) + puts "Spreadsheet URL: #{spreadsheet.spreadsheet_url}" + end + + private + + def authorize + scope = ['https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive'] + FileUtils.mkdir_p(File.dirname(CREDENTIALS_PATH)) + + credentials_json_string = ScihistDigicoll::Env.lookup(:test_google_project_credentials) + user_id = ScihistDigicoll::Env.lookup(:test_google_project_user_id) + + client_id = Google::Auth::ClientId.from_hash(JSON.parse(credentials_json_string)) + token_store = Google::Auth::Stores::FileTokenStore.new(file: File.join(Dir.home, '.credentials', "sheets.googleapis.com-ruby-csv.yaml")) + authorizer = Google::Auth::UserAuthorizer.new(client_id, scope, token_store) + credentials = authorizer.get_credentials(user_id) + + if credentials.nil? + url = authorizer.get_authorization_url(base_url: OOB_URI) + puts "Open the following URL in your browser and authorize the app:" + puts url + print 'Enter the authorization code: ' + code = gets.chomp + credentials = authorizer.get_and_store_credentials_from_code(user_id: user_id, code: code, base_url: OOB_URI) + end + credentials + end + + + def authorized_service + @authorized_service ||= begin + service = Google::Apis::SheetsV4::SheetsService.new + service.client_options.application_name = APPLICATION_NAME + service.authorization = authorize + service + end + end + + + def blank_spreadsheet + spreadsheet_properties = Google::Apis::SheetsV4::SpreadsheetProperties.new(title: "Metadata #{DateTime.now.strftime("%Y-%m-%d %H:%M:%S")}") + spreadsheet = Google::Apis::SheetsV4::Spreadsheet.new(properties: spreadsheet_properties) + authorized_service.create_spreadsheet(spreadsheet, fields: 'spreadsheetId,spreadsheetUrl') + end + + end +end \ No newline at end of file diff --git a/lib/scihist_digicoll/env.rb b/lib/scihist_digicoll/env.rb index fde7cfd3c..37ead0aaa 100644 --- a/lib/scihist_digicoll/env.rb +++ b/lib/scihist_digicoll/env.rb @@ -127,10 +127,14 @@ def self.aws_credentials # This is effectively a password - a shared secret. define_key :microsoft_sso_client_secret + # Official SHI google arts and culture project: define_key :google_arts_and_culture_project_id define_key :google_arts_and_culture_bucket_name define_key :google_arts_and_culture_credentials + # Test Google project, just for testing uploads in this project + define_key :test_google_project_credentials + define_key :test_google_project_user_id # MediaConvert requires a special role to be passed to MediaConvert # jobs, that has access to input/output buckets, and MediaConvert itself.