From 621587c1b1c84a57e1724fc436248e6356ad1dd6 Mon Sep 17 00:00:00 2001 From: Mason Ballengee Date: Mon, 16 Sep 2024 15:05:12 -0400 Subject: [PATCH] Bring in sharepoint code Refer to https://github.com/samvera/browse-everything/pull/418 for what is being built off of. --- SharePoint.md | 20 ++ lib/browse_everything.rb | 8 + .../auth/sharepoint/session.rb | 157 ++++++++++++++ lib/browse_everything/driver/sharepoint.rb | 198 ++++++++++++++++++ 4 files changed, 383 insertions(+) create mode 100644 SharePoint.md create mode 100644 lib/browse_everything/auth/sharepoint/session.rb create mode 100644 lib/browse_everything/driver/sharepoint.rb diff --git a/SharePoint.md b/SharePoint.md new file mode 100644 index 00000000..7bf71b64 --- /dev/null +++ b/SharePoint.md @@ -0,0 +1,20 @@ +# Sharepoint Provider + +This provider will allow browse-everything to access a _specific_ SharePoint location + +First register an application on azure to give access to the relevant location + +https://learn.microsoft.com/en-us/graph/auth-v2-service?tabs=http (steps 1,2 and 3) + +To us the sharepoint provider add the following to config/browse_everything_providers.yml + +``` +sharepoint: + client_id: [MyAppClientID] + client_secret: [MyAppClientSecret] + tenant_id: [MyAzuerTenantID] + grant_type: client_credentials + scope: https://graph.microsoft.com/.default + domain: mydomain.sharepoint.com + site_name: [MySiteName] +``` \ No newline at end of file diff --git a/lib/browse_everything.rb b/lib/browse_everything.rb index a50dcf11..16091d61 100644 --- a/lib/browse_everything.rb +++ b/lib/browse_everything.rb @@ -16,6 +16,8 @@ module Driver autoload :Box, 'browse_everything/driver/box' autoload :GoogleDrive, 'browse_everything/driver/google_drive' autoload :S3, 'browse_everything/driver/s3' + autoload :Sharepoint, 'browse_everything/driver/sharepoint' + # Access the sorter set for the base driver class # @return [Proc] @@ -39,6 +41,12 @@ module Google end end + module Auth + module Sharepoint + autoload :Session, 'browse_everything/auth/sharepoint/session' + end + end + class InitializationError < RuntimeError; end class ConfigurationError < StandardError; end class NotImplementedError < StandardError; end diff --git a/lib/browse_everything/auth/sharepoint/session.rb b/lib/browse_everything/auth/sharepoint/session.rb new file mode 100644 index 00000000..f2a1ad68 --- /dev/null +++ b/lib/browse_everything/auth/sharepoint/session.rb @@ -0,0 +1,157 @@ +require 'oauth2' + +# BrowseEverything OAuth2 session for +# Sharepoint provider +module BrowseEverything + module Auth + module Sharepoint + class Session + + OAUTH2_URLS = { + :site => 'https://login.microsoftonline.com', + } +# :scope => "https://graph.microsoft.com/.default" + + def initialize(opts={}) + + @config = BrowseEverything.config['sharepoint'] + + if opts[:client_id] + @oauth2_client = OAuth2::Client.new(opts[:client_id], opts[:client_secret],{:authorize_url => authorize_url, :token_url => token_url, :scope => scope}.merge!(OAUTH2_URLS.dup)) + @access_token = OAuth2::AccessToken.new(@oauth2_client, opts[:access_token]) if opts[:access_token] + @access_token = get_access_token if opts[:access_token].blank? + @refresh_token = opts[:refresh_token] if @config[:grant_type] == 'authorization_code' +# @as_user = opts[:as_user] + end + end + + def authorize_url + @config['tenant_id']+"/oauth2/v2.0/authorize" + end + + def token_url + @config['tenant_id']+"/oauth2/v2.0/token" + end + + def scope + @config['scope'] + end + +# def authorize_url(redirect_uri, state=nil) +# opts = { :redirect_uri => redirect_uri } +# opts[:state] = state if state +# +# @oauth2_client.auth_code.authorize_url(opts) +# end + + def get_access_token(code=nil) + + if @config[:grant_type] == 'client_credentials' + @access_token ||= @oauth2_client.client_credentials.get_token({:scope => @config[:scope]}) + else + # assume authorization_code grant_type..? + @access_token ||= @oauth2_client.auth_code.get_token(code) + end + end + + def refresh_token(refresh_token) + refresh_access_token_obj = OAuth2::AccessToken.new(@oauth2_client, @access_token.token, {'refresh_token' => refresh_token}) + @access_token = refresh_access_token_obj.refresh! + end + + def build_auth_header + "BoxAuth api_key=#{@api_key}&auth_token=#{@auth_token}" + end + + def get(url, raw=false) + uri = URI.parse(url) + request = Net::HTTP::Get.new( uri.request_uri ) + resp = request( uri, request, raw ) + end + + def delete(url, raw=false) + uri = URI.parse(url) + request = Net::HTTP::Delete.new( uri.request_uri ) + resp = request( uri, request, raw ) + end + + def request(uri, request, raw=false, retries=0) + + http = Net::HTTP.new(uri.host, uri.port) + http.use_ssl = true + #http.set_debug_output($stdout) + + if @access_token + request.add_field('Authorization', "Bearer #{@access_token.token}") + else + request.add_field('Authorization', build_auth_header) + end + + + request.add_field('As-User', "#{@as_user}") if @as_user + + response = http.request(request) + + if response.is_a? Net::HTTPNotFound + raise RubyBox::ObjectNotFound + end + + # Got unauthorized (401) status, try to refresh the token + if response.code.to_i == 401 and @refresh_token and retries == 0 + refresh_token(@refresh_token) + return request(uri, request, raw, retries + 1) + end + + sleep(@backoff) # try not to excessively hammer API. + + handle_errors( response, raw ) + end + + def do_stream(url, opts) + params = { + :content_length_proc => opts[:content_length_proc], + :progress_proc => opts[:progress_proc] + } + + if @access_token + params['Authorization'] = "Bearer #{@access_token.token}" + else + params['Authorization'] = build_auth_header + end + + params['As-User'] = @as_user if @as_user + + open(url, params) + end + + def handle_errors( response, raw ) + status = response.code.to_i + body = response.body + begin + parsed_body = JSON.parse(body) + rescue + msg = body.nil? || body.empty? ? "no data returned" : body + parsed_body = { "message" => msg } + end + + # status is used to determine whether + # we need to refresh the access token. + parsed_body["status"] = status + + case status / 100 + when 3 + # 302 Found. We should return the url + parsed_body["location"] = response["Location"] if status == 302 + when 4 + raise(RubyBox::ItemNameInUse.new(parsed_body, status, body), parsed_body["message"]) if parsed_body["code"] == "item_name_in_use" + raise(RubyBox::AuthError.new(parsed_body, status, body), parsed_body["message"]) if parsed_body["code"] == "unauthorized" || status == 401 + raise(RubyBox::RequestError.new(parsed_body, status, body), parsed_body["message"]) + when 5 + raise(RubyBox::ServerError.new(parsed_body, status, body), parsed_body["message"]) + end + raw ? body : parsed_body + end + end + end + end +end \ No newline at end of file diff --git a/lib/browse_everything/driver/sharepoint.rb b/lib/browse_everything/driver/sharepoint.rb new file mode 100644 index 00000000..f8cb42aa --- /dev/null +++ b/lib/browse_everything/driver/sharepoint.rb @@ -0,0 +1,198 @@ +# frozen_string_literal: true + +require_relative 'authentication_factory' + +module BrowseEverything + module Driver + # Driver for accessing the MS-Graph API (https://learn.microsoft.com/en-us/graph/overview) + class Sharepoint < Base + + class << self + attr_accessor :authentication_klass + + def default_authentication_klass + BrowseEverything::Auth::Sharepoint::Session + end + end + + # Constructor + # @param config_values [Hash] configuration for the driver + def initialize(config_values) + self.class.authentication_klass ||= self.class.default_authentication_klass + super(config_values) + end + + def icon + 'cloud' + end + + # Validates the configuration for the Sharepoint provider + def validate_config + raise InitializationError, 'Sharepoint driver requires a :client_id argument' unless config[:client_id] + raise InitializationError, 'Sharepoint driver requires a :client_secret argument' unless config[:client_secret] + raise InitializationError, 'Sharepoint driver requires a :domain argument' unless config[:domain] + raise InitializationError, 'Sharepoint driver requires a :site_name argument' unless config[:site_name] + end + + # Retrieves the file entry objects for a given path to MS-graph drive resource + # @param [String] id of the file or folder + # @return [Array] + def contents(id = '') + sharepoint_session + folder = id.empty? ? drives : items_by_id(id) + values = [] + + folder.each do |f| + values << directory_entry(f) + end + @entries = values.compact + + @sorter.call(@entries) + end + + # Not used as we currently only deal with Client Credentials flow + # @return [String] + # Authorization url that is used to request the initial access code from Sharepoint/Onedrive/365/etc + def auth_link(*_args) + Addressable::URI.parse("https://login.microsoftonline.com/kingsfund.org.uk/oauth2/v2.0/authorize") + end + + # @return [Boolean] + def authorized? + unless @token.present? + authorize! + end + @token.present? + end + + def authorize! + # TODO handle other authentication strategies (other than client_credentials) + register_access_token(sharepoint_session.get_access_token) + end + + # @param [String] id of the file on MS graph drive + # @return [Array] + def link_for(id) + file = items_by_id(id) + extras = {file_name: file['name'], file_size: file['size'].to_i} + [download_url(file), extras] + end + + + private + + def token_expired? + return true if expiration_time.nil? + Time.now.to_i > expiration_time + end + + + def session + AuthenticationFactory.new( + self.class.authentication_klass, + client_id: config[:client_id], + client_secret: config[:client_secret], + access_token: sharepoint_token, + domain: config[:domain], + site_name: config[:site_name], + ) + end + + def authenticate + session.authenticate + end + + # If there is an active session, {@token} will be set by {BrowseEverythingController} using data stored in the + # session. + # + # @param [OAuth2::AccessToken] access_token + def register_access_token(access_token) + @token = { + 'token' => access_token.token, + 'expires_at' => access_token.expires_at + } + end + + def sharepoint_token + return unless @token + @token.fetch('token', nil) + end + + def expiration_time + return unless @token + @token.fetch('expires_at', nil).to_i + end + + # Constructs a BrowseEverything::FileEntry object for a Sharepoint file + # resource + # @param file [String] ID to the file resource + # @return [BrowseEverything::File] + def directory_entry(file) + BrowseEverything::FileEntry.new(make_path(file), [key, make_path(file)].join(':'), file['name'], file['size'] ? file['size'] : nil, Date.parse(file['lastModifiedDateTime']), folder?(file)) + end + + # Derives a path from item (file or folder or drive) metadata + # that can be used in subsequent items_by_id calls + def make_path(file) + if file['parentReference'].present? + folder?(file) ? "#{file['parentReference']['driveId']}/items/#{file['id']}/children" : "#{file['parentReference']['driveId']}/items/#{file['id']}" + else + "#{file['id']}/root/children" + end + end + + def folder?(file) + !file['file'].present? + end + + ################################################################## + # The below are all candidates to go its own sharepoint api module + # or some such + ################################################################## + + def sharepoint_request(sharepoint_uri) + sharepoint_client + @auth = "Bearer "+sharepoint_token + + uri = URI.parse(sharepoint_uri) + http = Net::HTTP.new(uri.host, uri.port) + http.use_ssl = true if uri.scheme == 'https' + + response = http.start do + request = Net::HTTP::Get.new(uri.request_uri,{'Authorization' => @auth}) + http.request(request) + end + JSON.parse(response.body) + end + + def site_id + @site_id ||= sharepoint_request("https://graph.microsoft.com/v1.0/sites/#{config[:domain]}:/sites/#{config[:site_name]}/")['id'] + end + + def drives + @drives ||= sharepoint_request("https://graph.microsoft.com/v1.0/sites/#{site_id}/drives")['value'] + end + + def items_by_id(id) + item = sharepoint_request("https://graph.microsoft.com/v1.0/sites/#{site_id}/drives/#{id}") + item['value'].present? ? item['value'] : item + end + + def download_url(file) + file['@microsoft.graph.downloadUrl'] + end + + def sharepoint_client + if token_expired? + session = sharepoint_session + register_access_token(sharepoint_session.get_access_token) + end + end + + def sharepoint_session + authenticate + end + + end + end +end \ No newline at end of file