Skip to content
This repository has been archived by the owner on Jun 27, 2020. It is now read-only.

Commit

Permalink
Refactored external file management
Browse files Browse the repository at this point in the history
  • Loading branch information
dchandekstark committed Aug 6, 2014
1 parent 88316a2 commit 15863bf
Show file tree
Hide file tree
Showing 14 changed files with 241 additions and 95 deletions.
2 changes: 0 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ script: "bundle exec rake dul_hydra:ci:build"
notifications:
email:
- [email protected]
env:
- EXTERNAL_FILE_STORE="/tmp"
# To exclude antivirus tests:
# env:
# - SPEC_OPTS="--tag ~antivirus"
Expand Down
28 changes: 28 additions & 0 deletions app/models/concerns/dul_hydra/datastream_behavior.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
module DulHydra
module DatastreamBehavior

# Returns a list of the external file paths for all versions of the datastream.
def file_paths
raise "The `file_paths' method is valid only for external datastreams." unless external?
return Array(file_path) if new?
versions.map(&:file_path).compact
end

# Returns the external file path for the datastream.
# Returns nil if dsLocation is not a file URI.
def file_path
raise "The `file_path' method is valid only for external datastreams." unless external?
DulHydra::Utils.path_from_uri(dsLocation) if DulHydra::Utils.file_uri?(dsLocation)
end

# Returns the file name of the external file for the datastream.
# See #external_datastream_file_path(ds)
def file_name
raise "The `file_name' method is valid only for external datastreams." unless external?
if path = file_path
File.basename(path)
end
end

end
end
114 changes: 58 additions & 56 deletions app/models/concerns/dul_hydra/file_management.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ module FileManagement
extend ActiveSupport::Concern

included do
attr_accessor :file_to_add

define_model_callbacks :add_file
before_add_file :virus_scan

after_save :notify_virus_scan_results

# Deleting the datastream external files on destroying the object can't
Expand All @@ -11,74 +16,77 @@ module FileManagement
around_destroy :cleanup_external_files_on_destroy
end

# Override Hydra::ModelMethods
# XXX I would prefer the signature to be: add_file(file, dsid, opts={})
def add_file file, dsid, file_name
virus_scan(file)
mime_type = DulHydra::Utils.mime_type_for(file, file_name)
if datastreams.include?(dsid) && datastreams[dsid].external?
return add_external_file(file, dsid, file_name: file_name, mime_type: mime_type)
# Comparable to Hydra::ModelMethods method add_file(file, dsid, file_name)
def add_file file, dsid, opts={}
self.file_to_add = file
run_callbacks(:add_file) do
opts[:file_name] ||= DulHydra::Utils.file_name_for(file)
opts[:mime_type] ||= DulHydra::Utils.mime_type_for(file, opts[:file_name])
if opts.delete(:external) || datastreams.include?(dsid) && datastreams[dsid].external?
add_external_file(file, dsid, opts)
else
# ActiveFedora method
add_file_datastream(file, dsid: dsid, mimeType: opts[:mime_type])
end
end
add_file_datastream(file, dsid: dsid, mimeType: mime_type)
self.file_to_add = nil
end

# Normally this method should not be called directly
# Call #add_file with dsid for external datastream, or :external => true if no spec for dsid
def add_external_file file, dsid, opts={}
file_path = DulHydra::Utils.file_path(file) # raises ArgumentError

# Retrieve or create the datastream
datastream = datastreams.include?(dsid) ? datastreams[dsid] : add_external_datastream(dsid)
ds = datastreams.include?(dsid) ? datastreams[dsid] : add_external_datastream(dsid)

raise ArgumentError, "Cannot add external file to datastream with controlGroup \"#{ds.controlGroup}\": #{ds.inspect}" unless ds.external?

raise DulHydra::Error, "Cannot add external file to datastream when dsLocation change is pending." if datastream.dsLocation_changed?
raise DulHydra::Error, "Cannot add external file to datastream when dsLocation change is pending." if ds.dsLocation_changed?

# set the mime type
datastream.mimeType = opts[:mime_type] || DulHydra::Utils.mime_type_for(file, file_path)
# the :mime_type option will be set when called from #add_file
# the fallback is there in case #add_external_file is called directly
ds.mimeType = opts[:mime_type] || DulHydra::Utils.mime_type_for(file, file_path)

# copy the file to storage unless we're using the original
if opts[:use_original]
store_path = file_path
else
# generate storage path
file_name = opts[:file_name] || File.basename(file_path)
file_name = opts[:file_name] || DulHydra::Utils.file_name_for(file)
store_path = generate_external_file_path(file_name)
# create new directory
FileUtils.mkdir_p File.dirname(store_path)
# copy the original file to the storage location
FileUtils.cp file_path, store_path
end

datastream.dsLocation = DulHydra::Utils.path_to_uri(store_path)
ds.dsLocation = DulHydra::Utils.path_to_uri(store_path)
end

def external_datastream_file_paths ds=nil
paths = []
if ds
raise ArgumentError, "Datastream not present on object \"#{pid}\": #{ds.inspect}" unless ds
raise ArgumentError, "Datastream is not external: #{ds.inspect}" unless ds.external?
ds.versions.each { |dsVersion| paths << external_datastream_file_path(dsVersion) }
else # iterate over all external datastreams
datastreams.values.select { |ds| ds.external? }.each do |ds|
paths.concat external_datastream_file_paths(ds)
end
end
paths.compact
#
# Generates a full path storage location for the file_name
#
# Example: file_name = "special.doc"
#
# => {external_file_store}/1/e/69/1e691815-0631-4f9b-8e23-2dfb2eec9c70/special.doc
#
def generate_external_file_path file_name
File.join(external_file_store, generate_external_file_subpath, file_name)
end

def external_datastream_file_path ds
raise ArgumentError, "Datastream is not external: #{ds.inspect}" unless ds.external?
DulHydra::Utils.path_from_uri(ds.dsLocation) if DulHydra::Utils.file_uri?(ds.dsLocation)
def external_datastreams
datastreams.values.select { |ds| ds.external? }
end

def external_datastream_file_name ds
path = external_datastream_file_path(ds)
path ? File.basename(path) : nil
def external_datastream_file_paths
external_datastreams.map(&:file_paths).flatten
end

# This method essentially duplicates what ActiveFedora::Datastreams#add_file_datastream
# does for adding a managed file datastream.

def add_external_datastream dsid, opts={}
options = {controlGroup: "E", dsLabel: "External file"}.merge(opts)
klass = self.class.datastream_class_for_name(dsid)
datastream = create_datastream(klass, dsid, options)
datastream = create_datastream(klass, dsid, controlGroup: "E")
add_datastream(datastream)
self.class.build_datastream_accessor(dsid)
datastream
Expand All @@ -90,30 +98,18 @@ def virus_scan_results
@virus_scan_results ||= []
end

def notify_virus_scan_results
while virus_scan_results.present?
result = virus_scan_results.shift
ActiveSupport::Notifications.instrument(DulHydra::Notifications::VIRUS_CHECK, result: result, pid: pid)
def virus_scan
if DulHydra::Utils.file_or_path?(file_to_add) # can't virus scan blob
virus_scan_results << DulHydra::Services::Antivirus.scan(file_to_add)
end
end

def virus_scan file
if DulHydra::Utils.file_or_path?(file) # can't virus scan blob
virus_scan_results << DulHydra::Services::Antivirus.scan(file)
def notify_virus_scan_results
while result = virus_scan_results.shift
ActiveSupport::Notifications.instrument(DulHydra::Notifications::VIRUS_CHECK, result: result, pid: pid)
end
end

#
# Generates a full path storage location for the file_name
#
# Example: file_name = "special.doc"
#
# => {external_file_store}/1/e/69/1e691815-0631-4f9b-8e23-2dfb2eec9c70/special.doc
#
def generate_external_file_path file_name
File.join(external_file_store, generate_external_file_subpath, file_name)
end

def external_file_store
DulHydra.external_file_store
end
Expand All @@ -124,8 +120,14 @@ def generate_external_file_dirname

def generate_external_file_subpath
dirname = generate_external_file_dirname
m = DulHydra.external_file_subpath_regexp.match(dirname)
subpath_segments = m.to_a[1..-1] << dirname
subpath_segments = []
start = 0
DulHydra.external_file_subpath_pattern.each do |seg|
finish = start + seg - 1
subpath_segments << dirname[start..finish]
start = finish + 1
end
subpath_segments << dirname
File.join *subpath_segments
end

Expand Down
20 changes: 6 additions & 14 deletions app/models/concerns/dul_hydra/has_content.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,27 +17,19 @@ module HasContent
end

def original_filename
external_datastream_file_name(content)
content.external? ? content.file_name : properties.original_filename.first
end

# Set content to file and return boolean for changed (true)/not changed (false)
def upload file, file_name=nil
file_name ||= if file.respond_to?(:original_filename)
file.original_filename
elsif file.respond_to?(:path)
File.basename(file.path)
elsif file.is_a?(String) && (file.length < 1024) && File.exists?(file)
File.basename(file)
else
raise ArgumentError, "File name not provided and unable to determine from file."
end
add_file(file, DulHydra::Datastreams::CONTENT, file_name)
def upload file, opts={}
add_file(file, DulHydra::Datastreams::CONTENT, opts)
end

# Set content to file and save if changed.
# Return boolean for success of upload and save.
def upload! file, file_name=nil
upload(file, file_name) && save
def upload! file, opts={}
upload(file, opts)
save
end

def content_type
Expand Down
2 changes: 0 additions & 2 deletions app/models/dul_hydra/base.rb
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
module DulHydra
class Base < ActiveFedora::Base

include Hydra::ModelMethods

include Describable
include Governable
include AccessControllable
Expand Down
5 changes: 5 additions & 0 deletions config/initializers/extensions/active_fedora/datastream.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
module ActiveFedora
class Datastream
include DulHydra::DatastreamBehavior
end
end
12 changes: 0 additions & 12 deletions lib/dul_hydra.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,4 @@ module DulHydra

include DulHydra::Configurable

def self.external_file_subpath_regexp
@@external_file_subpath_regexp ||=
begin
pattern = external_file_subpath_pattern
unless pattern.respond_to?(:each)
# pattern might be a string, e.g., "1, 1, 2"
pattern = pattern.split(/\s?,/).map(&:to_i)
end
Regexp.new pattern.each_with_object("^") {|p, memo| memo << "(\\h{#{p}})"}
end
end

end
5 changes: 1 addition & 4 deletions lib/dul_hydra/batch/models/batch_object.rb
Original file line number Diff line number Diff line change
Expand Up @@ -218,10 +218,7 @@ def populate_datastream(repo_object, datastream)
when DulHydra::Batch::Models::BatchObjectDatastream::PAYLOAD_TYPE_BYTES
repo_object.datastreams[datastream[:name]].content = datastream[:payload]
when DulHydra::Batch::Models::BatchObjectDatastream::PAYLOAD_TYPE_FILENAME
file = File.new(datastream[:payload])
file_name = File.basename(datastream[:payload])
dsid = datastream[:name]
repo_object.add_file(file, dsid, file_name)
repo_object.add_file File.new(datastream[:payload]), datastream[:name]
end
return repo_object
end
Expand Down
5 changes: 2 additions & 3 deletions lib/dul_hydra/configurable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,8 @@ module Configurable
# Base directory of external file store
mattr_accessor :external_file_store

# Pattern (String or Array) for building external file subpath from hex digest
# Examples:
# - "1, 1, 2"
# Pattern (Array) for building external file subpath from hex digest
# Example:
# - [1, 1, 2]
mattr_accessor :external_file_subpath_pattern
end
Expand Down
5 changes: 5 additions & 0 deletions lib/dul_hydra/utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ def self.file_path file
end
end

def self.file_name_for file
return file.original_filename if file.respond_to?(:original_filename) && file.original_filename.present?
File.basename(file_path(file)) rescue nil
end

def self.file_uri?(uri)
return false unless uri
URI.parse(uri).scheme == "file"
Expand Down
40 changes: 40 additions & 0 deletions spec/extensions/active_fedora/datastream_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
require 'spec_helper'

module ActiveFedora
describe Datastream do

describe "extensions for external datastreams" do
subject { described_class.new(nil, nil, controlGroup: "E") }

describe "#file_path" do
it "should return nil when dsLocation is not set" do
expect(subject.file_path).to be_nil
end
it "should return nil when dsLocation is not a file URI" do
subject.dsLocation = "http://library.duke.edu/"
expect(subject.file_path).to be_nil
end
it "should return the file path when dsLocation is a file URI" do
subject.dsLocation = "file:/tmp/foo/bar.txt"
expect(subject.file_path).to eq "/tmp/foo/bar.txt"
end
end

describe "#file_name" do
it "should return nil when dsLocation is not set" do
expect(subject.file_name).to be_nil
end
it "should return nil when dsLocation is not a file URI" do
subject.dsLocation = "http://library.duke.edu/"
expect(subject.file_name).to be_nil
end
it "should return the file name when dsLocation is a file URI" do
subject.dsLocation = "file:/tmp/foo/bar.txt"
expect(subject.file_name).to eq "bar.txt"
end
end

end # external datastreams

end
end
Loading

0 comments on commit 15863bf

Please sign in to comment.