Skip to content
This repository has been archived by the owner on Jun 27, 2020. It is now read-only.

Commit

Permalink
Add external file support
Browse files Browse the repository at this point in the history
Content datastream now uses controlGroup E
Closes #972
  • Loading branch information
dchandekstark committed Aug 1, 2014
1 parent c9cdf55 commit 1da0d64
Show file tree
Hide file tree
Showing 26 changed files with 474 additions and 380 deletions.
2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ script: "bundle exec rake dul_hydra:ci:build"
notifications:
email:
- [email protected]
env:
- EXTERNAL_FILE_STORE="/tmp"
# To exclude antivirus tests:
# env:
# - SPEC_OPTS="--tag ~antivirus"
Expand Down
35 changes: 0 additions & 35 deletions app/actions/virus_check.rb

This file was deleted.

7 changes: 5 additions & 2 deletions app/controllers/downloads_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@ class DownloadsController < ApplicationController

def load_asset
# XXX Loading instance from solr doesn't work with customized datastream_name (below).
@asset = ActiveFedora::Base.find(params[asset_param_key], cast: true)
@asset = ActiveFedora::Base.find(params[asset_param_key])
end

def datastream_name
if datastream.dsid == DulHydra::Datastreams::CONTENT
if datastream.external?
file_path = DulHydra::Utils.path_from_uri(datastream.dsLocation)
return File.basename(file_path)
elsif datastream.dsid == DulHydra::Datastreams::CONTENT
return asset.original_filename if asset.original_filename.present?
if asset.identifier.present? # Identifier may be file name minus extension
identifier = asset.identifier.first
Expand Down
139 changes: 139 additions & 0 deletions app/models/concerns/dul_hydra/file_management.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
module DulHydra
module FileManagement
extend ActiveSupport::Concern

included do
after_save :notify_virus_scan_results

# Deleting the datastream external files on destroying the object can't
# be handled with a datastream around_destroy callback.
# See https://groups.google.com/d/msg/hydra-tech/xJaZr2wVhbg/4iafvso98w8J
around_destroy :cleanup_external_files_on_destroy
end

# Override Hydra::ModelMethods
# XXX I would prefer the signature to be: add_file(file, dsid, opts={})
def add_file file, dsid, file_name
virus_scan(file)
mime_type = DulHydra::Utils.mime_type_for(file, file_name)
if datastreams.include?(dsid) && datastreams[dsid].external?
return add_external_file(file, dsid, file_name: file_name, mime_type: mime_type)
end
add_file_datastream(file, dsid: dsid, mimeType: mime_type)
end

def add_external_file file, dsid, opts={}
file_path = DulHydra::Utils.file_path(file) # raises ArgumentError

# Retrieve or create the datastream
datastream = datastreams.include?(dsid) ? datastreams[dsid] : add_external_datastream(dsid)

raise DulHydra::Error, "Cannot add external file to datastream when dsLocation change is pending." if datastream.dsLocation_changed?

# set the mime type
datastream.mimeType = opts[:mime_type] || DulHydra::Utils.mime_type_for(file, file_path)

# copy the file to storage unless we're using the original
if opts[:use_original]
store_path = file_path
else
# generate storage path
file_name = opts[:file_name] || File.basename(file_path)
store_path = generate_external_file_path(file_name)
# create new directory
FileUtils.mkdir_p File.dirname(store_path)
# copy the original file to the storage location
FileUtils.cp file_path, store_path
end

datastream.dsLocation = DulHydra::Utils.path_to_uri(store_path)
end

def external_datastream_file_paths ds=nil
paths = []
if ds
raise ArgumentError, "Datastream not present on object \"#{pid}\": #{ds.inspect}" unless ds
raise ArgumentError, "Datastream is not external: #{ds.inspect}" unless ds.external?
ds.versions.each { |dsVersion| paths << external_datastream_file_path(dsVersion) }
else # iterate over all external datastreams
datastreams.values.select { |ds| ds.external? }.each do |ds|
paths.concat external_datastream_file_paths(ds)
end
end
paths.compact
end

def external_datastream_file_path ds
raise ArgumentError, "Datastream is not external: #{ds.inspect}" unless ds.external?
DulHydra::Utils.path_from_uri(ds.dsLocation) if DulHydra::Utils.file_uri?(ds.dsLocation)
end

def external_datastream_file_name ds
path = external_datastream_file_path(ds)
path ? File.basename(path) : nil
end

# This method essentially duplicates what ActiveFedora::Datastreams#add_file_datastream
# does for adding a managed file datastream.
def add_external_datastream dsid, opts={}
options = {controlGroup: "E", dsLabel: "External file"}.merge(opts)
klass = self.class.datastream_class_for_name(dsid)
datastream = create_datastream(klass, dsid, options)
add_datastream(datastream)
self.class.build_datastream_accessor(dsid)
datastream
end

protected

def virus_scan_results
@virus_scan_results ||= []
end

def notify_virus_scan_results
while virus_scan_results.present?
result = virus_scan_results.shift
ActiveSupport::Notifications.instrument(DulHydra::Notifications::VIRUS_CHECK, result: result, pid: pid)
end
end

def virus_scan file
if DulHydra::Utils.file_or_path?(file) # can't virus scan blob
virus_scan_results << DulHydra::Services::Antivirus.scan(file)
end
end

#
# Generates a full path storage location for the file_name
#
# Example: file_name = "special.doc"
#
# => {external_file_store}/1/e/69/1e691815-0631-4f9b-8e23-2dfb2eec9c70/special.doc
#
def generate_external_file_path file_name
File.join(external_file_store, generate_external_file_subpath, file_name)
end

def external_file_store
DulHydra.external_file_store
end

def generate_external_file_dirname
SecureRandom.uuid
end

def generate_external_file_subpath
dirname = generate_external_file_dirname
m = DulHydra.external_file_subpath_regexp.match(dirname)
subpath_segments = m.to_a[1..-1] << dirname
File.join *subpath_segments
end

def cleanup_external_files_on_destroy
paths = external_datastream_file_paths
yield
File.unlink *paths
end

end
end
68 changes: 31 additions & 37 deletions app/models/concerns/dul_hydra/has_content.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,37 +5,39 @@ module HasContent
extend ActiveSupport::Concern

included do
has_file_datastream name: DulHydra::Datastreams::CONTENT,
type: DulHydra::Datastreams::FileContentDatastream,
has_file_datastream name: DulHydra::Datastreams::CONTENT,
versionable: true,
label: "Content file for this object",
control_group: 'M'
label: "Content file for this object",
control_group: "E"

include Hydra::Derivatives
include DulHydra::VirusCheckable

# Original file name of content file should be stored in this property
has_attributes :original_filename, datastream: DulHydra::Datastreams::PROPERTIES, multiple: false

before_save :set_original_filename, if: :content_changed?, unless: :original_filename_changed?
before_save :set_content_type, if: :content_changed?
around_save :update_thumbnail, if: :content_changed?
around_save :update_thumbnail, if: :external_file_changed?
delegate :has_content?, to: :content
end

def content_changed?
content.content_changed?
def original_filename
external_datastream_file_name(content)
end

# Set content to file and return boolean for changed (true)/not changed (false)
def upload file
self.content.content = file
content_changed?
def upload file, file_name=nil
file_name ||= if file.respond_to?(:original_filename)
file.original_filename
elsif file.respond_to?(:path)
File.basename(file.path)
elsif file.is_a?(String) && (file.length < 1024) && File.exists?(file)
File.basename(file)
else
raise ArgumentError, "File name not provided and unable to determine from file."
end
add_file(file, DulHydra::Datastreams::CONTENT, file_name)
end

# Set content to file and save if changed.
# Return boolean for success of upload and save.
def upload! file
upload(file) && save
def upload! file, file_name=nil
upload(file, file_name) && save
end

def content_type
Expand All @@ -54,10 +56,6 @@ def pdf?
content_type == "application/pdf"
end

def has_content?
content.has_content?
end

def set_thumbnail
return unless has_content?
if image? or pdf?
Expand All @@ -66,9 +64,7 @@ def set_thumbnail
end

def validate_checksum! checksum, checksum_type
if content_changed?
raise DulHydra::Error, "Cannot validate checksum against unpersisted content."
end
raise DulHydra::Error, "Checksum cannot be validated on unpersisted object." if new_record?
if content.checksumType == checksum_type
content_checksum = content.checksum
else
Expand All @@ -86,26 +82,24 @@ def validate_checksum! checksum, checksum_type
end
end

protected

def set_original_filename
file = content.content
if file.respond_to?(:original_filename)
self.original_filename = file.original_filename
elsif file.respond_to?(:path)
self.original_filename = File.basename(file.path)
end
def virus_checks
VirusCheckEvent.for_object(self)
end

def set_content_type
file = content.content
self.content_type = file.content_type if file.respond_to?(:content_type)
protected

def external_file_changed?
content.dsLocation_changed?
end

def update_thumbnail
yield
set_thumbnail!
end

def default_content_type
"application/octet-stream"
end

end
end
1 change: 0 additions & 1 deletion app/models/concerns/dul_hydra/has_thumbnail.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ module HasThumbnail

included do
has_file_datastream name: DulHydra::Datastreams::THUMBNAIL,
type: DulHydra::Datastreams::FileContentDatastream,
versionable: true,
label: "Thumbnail for this object",
control_group: 'M'
Expand Down
9 changes: 8 additions & 1 deletion app/models/concerns/dul_hydra/solr_document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,14 @@ def content_mime_type
alias_method :content_type, :content_mime_type

def content_size
content_ds["dsSize"] rescue nil
if content_ds["dsControlGroup"] == "E"
file_path = DulHydra::Utils.path_from_uri(content_ds["dsLocation"])
File.size(file_path)
else
content_ds["dsSize"]
end
rescue
nil
end

def content_checksum
Expand Down
28 changes: 0 additions & 28 deletions app/models/concerns/dul_hydra/virus_checkable.rb

This file was deleted.

3 changes: 3 additions & 0 deletions app/models/dul_hydra/base.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
module DulHydra
class Base < ActiveFedora::Base

include Hydra::ModelMethods

include Describable
include Governable
include AccessControllable
Expand All @@ -11,6 +13,7 @@ class Base < ActiveFedora::Base
include EventLoggable
include Validations
include FixityCheckable
include FileManagement

def to_solr(solr_doc=Hash.new, opts={})
solr_doc = super(solr_doc, opts)
Expand Down
Loading

0 comments on commit 1da0d64

Please sign in to comment.