Skip to content

Commit

Permalink
overhauling to use terminologies. Currently busted on some strange ge…
Browse files Browse the repository at this point in the history
…m loading behavior
  • Loading branch information
flyingzumwalt committed Sep 13, 2010
1 parent ceb6a3c commit 1b334b6
Show file tree
Hide file tree
Showing 11 changed files with 44 additions and 239 deletions.
1 change: 1 addition & 0 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ begin
gem.add_dependency('nokogiri')
# gem.add_dependency('om', '>= 0.1.9')
gem.add_dependency('om', '>= 1.0')
gem.add_dependency('solrizer', '>=0.1.4')
# gem.add_dependency('yaml')

gem.add_development_dependency "rspec", ">= 1.2.9"
Expand Down
4 changes: 2 additions & 2 deletions lib/active_fedora.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
$: << 'lib'
require 'logger'
require 'active_fedora/solr_service.rb'
require 'active_fedora/solr_mapper.rb'
require 'solrizer/field_name_mapper'

SOLR_DOCUMENT_ID = ActiveFedora::SolrService.mappings["id"] unless defined?(SOLR_DOCUMENT_ID)
SOLR_DOCUMENT_ID = Solrizer::FieldNameMapper.mappings["id"] unless defined?(SOLR_DOCUMENT_ID)
ENABLE_SOLR_UPDATES = true unless defined?(ENABLE_SOLR_UPDATES)

require 'ruby-fedora'
Expand Down
3 changes: 2 additions & 1 deletion lib/active_fedora/base.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
require 'util/class_level_inheritable_attributes'
require 'active_fedora/model'
require 'active_fedora/semantic_node'
require 'solrizer/field_name_mapper'
require 'nokogiri'

SOLR_DOCUMENT_ID = "id" unless defined?(SOLR_DOCUMENT_ID)
Expand Down Expand Up @@ -33,7 +34,7 @@ class Base
ms_inheritable_attributes :ds_specs
include Model
include SemanticNode
include SolrMapper
include Solrizer::FieldNameMapper

has_relationship "collection_members", :has_collection_member

Expand Down
14 changes: 8 additions & 6 deletions lib/active_fedora/metadata_datastream_helper.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
require 'solrizer/field_name_mapper'

#this class represents a MetadataDatastream, a special case of ActiveFedora::Datastream
module ActiveFedora::MetadataDatastreamHelper

Expand All @@ -16,7 +18,7 @@ def fields

def self.included(klass)
klass.extend(ClassMethods)
klass.send(:include, ActiveFedora::SolrMapper)
klass.send(:include, Solrizer::FieldNameMapper)
end

#constructor, calls up to ActiveFedora::Datastream's constructor
Expand Down Expand Up @@ -71,10 +73,10 @@ def to_xml(xml = Nokogiri::XML::Document.parse("<fields />")) #:nodoc:
end


protected

def generate_solr_symbol(field_name, field_type) # :nodoc:
solr_name(field_name, field_type)
end
# protected
#
# def generate_solr_symbol(field_name, field_type) # :nodoc:
# solr_name(field_name, field_type)
# end

end
2 changes: 1 addition & 1 deletion lib/active_fedora/model.rb
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def solr_search(query, args={})
def find_by_solr(query, args={})
if query == :all
escaped_class_name = self.name.gsub(/(:)/, '\\:')
SolrService.instance.conn.query("#{SolrMapper.solr_name(:active_fedora_model, :symbol)}:#{escaped_class_name}", args)
SolrService.instance.conn.query("#{Solrizer::FieldNameMapper.solr_name(:active_fedora_model, :symbol)}:#{escaped_class_name}", args)
elsif query.class == String
escaped_id = query.gsub(/(:)/, '\\:')
SolrService.instance.conn.query("#{SOLR_DOCUMENT_ID}:#{escaped_id}", args)
Expand Down
56 changes: 4 additions & 52 deletions lib/active_fedora/nokogiri_datastream.rb
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
require "nokogiri"
require "om"
require "solrizer/xml"

#this class represents a MetadataDatastream, a special case of ActiveFedora::Datastream
class ActiveFedora::NokogiriDatastream < ActiveFedora::Datastream

include ActiveFedora::MetadataDatastreamHelper
include OM::XML::Document
include Solrizer::XML::TerminologyBasedSolrizer # this adds support for calling .to_solr

# extend(OM::XML::Container::ClassMethods)

attr_accessor :ng_xml
Expand Down Expand Up @@ -75,58 +79,6 @@ def to_xml(xml = self.ng_xml)
return xml.to_xml {|config| config.no_declaration}
end

def to_solr(solr_doc = Solr::Document.new) # :nodoc:

unless self.class.accessors.nil?
self.class.accessors.each_pair do |accessor_name,accessor_info|
solrize_accessor(accessor_name, accessor_info, :solr_doc=>solr_doc)
end
end

return solr_doc
end

def solrize_accessor(accessor_name, accessor_info, opts={})
solr_doc = opts.fetch(:solr_doc, Solr::Document.new)
parents = opts.fetch(:parents, [])

accessor_pointer = parents+[accessor_name]

if accessor_info.nil?
accessor_info = self.class.accessor_info(accessor_pointer)
if accessor_info.nil?
raise "No accessor is defined for #{accessor_info.select}"
end
end

# prep children hash
child_accessors = accessor_info.fetch(:children, {})
xpath = self.class.accessor_xpath(*accessor_pointer)
nodeset = lookup(xpath)

nodeset.each do |node|
# create solr fields
solrize_node(node, accessor_pointer, solr_doc)
child_accessors.each_pair do |child_accessor_name, child_accessor_info|
solrize_accessor(child_accessor_name, child_accessor_info, opts={:solr_doc=>solr_doc, :parents=>parents+[{accessor_name=>nodeset.index(node)}] })
end
end

end

def solrize_node(node, accessor_pointer, solr_doc = Solr::Document.new)
generic_field_name_base = self.class.accessor_generic_name(*accessor_pointer)
generic_field_name = generate_solr_symbol(generic_field_name_base, :text)

solr_doc << Solr::Field.new(generic_field_name => node.text)

if accessor_pointer.length > 1
hierarchical_field_name_base = self.class.accessor_hierarchical_name(*accessor_pointer)
hierarchical_field_name = generate_solr_symbol(hierarchical_field_name_base, :text)
solr_doc << Solr::Field.new(hierarchical_field_name => node.text)
end
end

def update_indexed_attributes(params={}, opts={})
if self.class.terminology.nil?
raise "No terminology is set for this NokogiriDatastream class. Cannot perform update_indexed_attributes"
Expand Down
3 changes: 2 additions & 1 deletion lib/active_fedora/rels_ext_datastream.rb
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
require 'solrizer/field_name_mapper'

module ActiveFedora
class RelsExtDatastream < Datastream

include ActiveFedora::SemanticNode
include ActiveFedora::SolrMapper
include Solrizer::FieldNameMapper


def initialize(attrs=nil)
Expand Down
28 changes: 20 additions & 8 deletions lib/active_fedora/solr_service.rb
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
require 'solr'
require "active_fedora/solr_mapper"
require "yaml"
module ActiveFedora
class SolrService
require "solrizer/field_name_mapper"

module ActiveFedora
class SolrService

@@mappings = {}
attr_reader :conn

def self.register(host=nil, args={})
Expand All @@ -28,7 +27,7 @@ def self.reify_solr_results(solr_result)
end
results = []
solr_result.hits.each do |hit|
model_value = hit[Solrizer::SolrMapper.solr_name("active_fedora_model", :symbol)].first
model_value = hit[Solrizer::FieldNameMapper.solr_name("active_fedora_model", :symbol)].first
if model_value.include?("::")
classname = eval(model_value)
else
Expand All @@ -53,9 +52,22 @@ def self.escape_uri_for_query(uri)
return uri.gsub(/(:)/, '\\:')
end

def self.mappings
Solrizer::FieldNameMapper.mappings
end
def self.mappings=(mappings)
Solrizer::FieldNameMapper.mappings = mappings
end

def self.logger
@logger ||= defined?(RAILS_DEFAULT_LOGGER) ? RAILS_DEFAULT_LOGGER : Logger.new(STDOUT)
end


# (re)load solr field name mappings
def load_mappings( config_path=nil )
Solrizer::FieldNameMapper.load_mappings(config_path)
end

class SolrNotInitialized < StandardError;end
end
end #SolrService
end #ActiveFedora
1 change: 0 additions & 1 deletion spec/unit/base_file_management_spec.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
require "active_fedora"

# Some tentative extensions to ActiveFedora::Base

Expand Down
140 changes: 4 additions & 136 deletions spec/unit/nokogiri_datastream_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
after(:each) do
end

it "should include the Solrizer::XML::TerminologyBasedSolrizer for .to_solr support" do
ActiveFedora::NokogiriDatastream.included_modules.should include(Solrizer::XML::TerminologyBasedSolrizer)
end

describe '#new' do
it 'should provide #new' do
ActiveFedora::NokogiriDatastream.should respond_to(:new)
Expand Down Expand Up @@ -275,140 +279,4 @@
end
end


describe ".to_solr" do

after(:all) do
# Revert to default mappings after running tests
ActiveFedora::SolrService.load_mappings
end

it "should iterate through the class accessors, calling .solrize_accessor on each and passing in the solr doc" do
mock_accessors = {:accessor1=>:accessor1_info, :accessor2=>:accessor2_info}
ActiveFedora::NokogiriDatastream.stubs(:accessors).returns(mock_accessors)
doc = Solr::Document.new
mock_accessors.each_pair do |k,v|
@test_ds.expects(:solrize_accessor).with(k, v, :solr_doc=>doc)
end
@test_ds.to_solr(doc)
end

it "should provide .to_solr and return a SolrDocument" do
@test_ds.should respond_to(:to_solr)
@test_ds.to_solr.should be_kind_of(Solr::Document)
end

it "should optionally allow you to provide the Solr::Document to add fields to and return that document when done" do
doc = Solr::Document.new
@test_ds.to_solr(doc).should equal(doc)
end

end

describe ".solrize_accessor" do
# before(:all) do
# class AccessorizedDs < ActiveFedora::NokogiriDatastream
#
# root_property :mods, "mods", "http://www.loc.gov/mods/v3", :attributes=>["id", "version"], :schema=>"http://www.loc.gov/standards/mods/v3/mods-3-2.xsd"
#
# accessor :title_info, :relative_xpath=>'oxns:titleInfo', :children=>[
# {:main_title=>{:relative_xpath=>'oxns:title'}},
# {:language =>{:relative_xpath=>{:attribute=>"lang"} }}
# ]
# accessor :finnish_title_info, :relative_xpath=>'oxns:titleInfo[@lang="finnish"]', :children=>[
# {:main_title=>{:relative_xpath=>'oxns:title'}},
# {:language =>{:relative_xpath=>{:attribute=>"lang"} }}
# ]
# accessor :abstract
# accessor :topic_tag, :relative_xpath=>'oxns:subject/oxns:topic'
# accessor :person, :relative_xpath=>'oxns:name[@type="personal"]', :children=>[
# {:last_name=>{:relative_xpath=>'oxns:namePart[@type="family"]'}},
# {:first_name=>{:relative_xpath=>'oxns:namePart[@type="given"]'}},
# {:institution=>{:relative_xpath=>'oxns:affiliation'}},
# {:role=>{:children=>[
# {:text=>{:relative_xpath=>'oxns:roleTerm[@type="text"]'}},
# {:code=>{:relative_xpath=>'oxns:roleTerm[@type="code"]'}}
# ]}}
# ]
# end
# end
#
# before(:each) do
# file = fixture(File.join("mods_articles", "hydrangea_article1.xml"))
# @mods_ds = AccessorizedDs.new(:blob=>file)
# end

before(:each) do
@mods_ds = Hydra::SampleModsDatastream.new(:blob=>fixture(File.join("mods_articles","hydrangea_article1.xml")))
end

it "should perform a lookup and iterate over nodes in the result set calling solrize_node then calling solrize_accessor on any of the children, adding accessor_name & node index to parents array" do
mock_title_info_set = ["TI1", "TI2"]
mock_main_title_set = ["main title"]
mock_language_set = ["language"]

solr_doc = Solr::Document.new

AccessorizedDs.expects(:accessor_xpath).with( :title_info ).returns("title_info_xpath")
@mods_ds.expects(:lookup).with( "title_info_xpath" ).returns(mock_title_info_set)

mock_title_info_set.each do |tin|
node_index = mock_title_info_set.index(tin)
@mods_ds.expects(:solrize_node).with(tin, [:title_info], solr_doc)

# Couldn't mock the recursive calls to solrize_accessor without preventing the initial one, so was forced to mock out the whole recursive stack.
# @mods_ds.expects(:solrize_accessor).with(:main_title, AccessorizedDs.accessors[:title_info][:children][:main_title], :parents=>[{:title_info=>node_index}])
# @mods_ds.expects(:solrize_accessor).with(:language, AccessorizedDs.accessors[:title_info][:children][:language], :parents=>[{:title_info=>node_index}])
AccessorizedDs.expects(:accessor_xpath).with( {:title_info=>node_index}, :main_title ).returns("title_info_main_title_xpath")
AccessorizedDs.expects(:accessor_xpath).with( {:title_info=>node_index}, :language ).returns("title_info_language_xpath")
@mods_ds.expects(:lookup).with( "title_info_main_title_xpath" ).returns(mock_main_title_set)
@mods_ds.expects(:lookup).with( "title_info_language_xpath" ).returns(mock_language_set)
@mods_ds.expects(:solrize_node).with("main title", [{:title_info=>node_index}, :main_title], solr_doc)
@mods_ds.expects(:solrize_node).with("language", [{:title_info=>node_index}, :language], solr_doc)
end

@mods_ds.solrize_accessor(:title_info, AccessorizedDs.accessors[:title_info], :solr_doc=>solr_doc)

end

it "should not call solrize_accessor once it reaches an accessor with no children accessors set" do
pending "not sure how to test for this"
@mods_ds.solrize_accessor(:text, AccessorizedDs.accessor_info( [{:person=>1}, :last_name] ), :parents=>[{:person=>1}])
end

it "should use values form parents array when requesting accessor_xpath and when generating solr field names" do
parents_array = [{:person=>0}, {:role=>1}]
AccessorizedDs.accessors[:person][:children][:role][:children][:text]

# This should catch the "submitter" roleTerm from the second role node within the first person node and put it into a solr field called "person_0_role_2_text_0_t" and a solr field called "person_role_text_t"
@mods_ds.solrize_accessor(:text, AccessorizedDs.accessor_info( *parents_array + [:text] ), :parents=>parents_array)
end

it "should use Solr mappings to generate field names" do

solr_doc = @mods_ds.to_solr
#should have these

solr_doc[:abstract_t].should == "ABSTRACT"
solr_doc[:title_info_1_language_t].should == "finnish"
solr_doc[:person_1_role_0_text_t].should == "teacher"
solr_doc[:finnish_title_info_language_t].should == "finnish"
solr_doc[:finnish_title_info_main_title_t].should == "Artikkelin otsikko Hydrangea artiklan 1"

# solr_doc[:mydate_date].should == "fake-date"
#
# solr_doc[:publisher_t].should be_nil
# solr_doc[:coverage_t].should be_nil
# solr_doc[:creation_date_dt].should be_nil
# solr_doc.should == ""

end
end

describe ".solrize_node" do
it "should create a solr field containing node.text"
it "should create hierarchical field entries if parents is not empty"
it "should only create one node if parents is empty"
end

end
Loading

0 comments on commit 1b334b6

Please sign in to comment.