diff --git a/Rakefile b/Rakefile index baed02e54..4db83a5b3 100644 --- a/Rakefile +++ b/Rakefile @@ -23,6 +23,7 @@ begin gem.add_dependency('nokogiri') # gem.add_dependency('om', '>= 0.1.9') gem.add_dependency('om', '>= 1.0') + gem.add_dependency('solrizer', '>=0.1.4') # gem.add_dependency('yaml') gem.add_development_dependency "rspec", ">= 1.2.9" diff --git a/lib/active_fedora.rb b/lib/active_fedora.rb index 4460fc07e..9372876b4 100644 --- a/lib/active_fedora.rb +++ b/lib/active_fedora.rb @@ -4,9 +4,9 @@ $: << 'lib' require 'logger' require 'active_fedora/solr_service.rb' -require 'active_fedora/solr_mapper.rb' +require 'solrizer/field_name_mapper' -SOLR_DOCUMENT_ID = ActiveFedora::SolrService.mappings["id"] unless defined?(SOLR_DOCUMENT_ID) +SOLR_DOCUMENT_ID = Solrizer::FieldNameMapper.mappings["id"] unless defined?(SOLR_DOCUMENT_ID) ENABLE_SOLR_UPDATES = true unless defined?(ENABLE_SOLR_UPDATES) require 'ruby-fedora' diff --git a/lib/active_fedora/base.rb b/lib/active_fedora/base.rb index 2feb4a51f..f09122f94 100644 --- a/lib/active_fedora/base.rb +++ b/lib/active_fedora/base.rb @@ -1,6 +1,7 @@ require 'util/class_level_inheritable_attributes' require 'active_fedora/model' require 'active_fedora/semantic_node' +require 'solrizer/field_name_mapper' require 'nokogiri' SOLR_DOCUMENT_ID = "id" unless defined?(SOLR_DOCUMENT_ID) @@ -33,7 +34,7 @@ class Base ms_inheritable_attributes :ds_specs include Model include SemanticNode - include SolrMapper + include Solrizer::FieldNameMapper has_relationship "collection_members", :has_collection_member diff --git a/lib/active_fedora/metadata_datastream_helper.rb b/lib/active_fedora/metadata_datastream_helper.rb index bcb79a517..138a584e5 100644 --- a/lib/active_fedora/metadata_datastream_helper.rb +++ b/lib/active_fedora/metadata_datastream_helper.rb @@ -1,3 +1,5 @@ +require 'solrizer/field_name_mapper' + #this class represents a MetadataDatastream, a special case of ActiveFedora::Datastream module ActiveFedora::MetadataDatastreamHelper @@ -16,7 +18,7 @@ def fields def self.included(klass) klass.extend(ClassMethods) - klass.send(:include, ActiveFedora::SolrMapper) + klass.send(:include, Solrizer::FieldNameMapper) end #constructor, calls up to ActiveFedora::Datastream's constructor @@ -71,10 +73,10 @@ def to_xml(xml = Nokogiri::XML::Document.parse("")) #:nodoc: end - protected - - def generate_solr_symbol(field_name, field_type) # :nodoc: - solr_name(field_name, field_type) - end + # protected + # + # def generate_solr_symbol(field_name, field_type) # :nodoc: + # solr_name(field_name, field_type) + # end end \ No newline at end of file diff --git a/lib/active_fedora/model.rb b/lib/active_fedora/model.rb index 64ec5f6a8..5473f68b9 100644 --- a/lib/active_fedora/model.rb +++ b/lib/active_fedora/model.rb @@ -89,7 +89,7 @@ def solr_search(query, args={}) def find_by_solr(query, args={}) if query == :all escaped_class_name = self.name.gsub(/(:)/, '\\:') - SolrService.instance.conn.query("#{SolrMapper.solr_name(:active_fedora_model, :symbol)}:#{escaped_class_name}", args) + SolrService.instance.conn.query("#{Solrizer::FieldNameMapper.solr_name(:active_fedora_model, :symbol)}:#{escaped_class_name}", args) elsif query.class == String escaped_id = query.gsub(/(:)/, '\\:') SolrService.instance.conn.query("#{SOLR_DOCUMENT_ID}:#{escaped_id}", args) diff --git a/lib/active_fedora/nokogiri_datastream.rb b/lib/active_fedora/nokogiri_datastream.rb index c6ca7249c..c7dafd269 100644 --- a/lib/active_fedora/nokogiri_datastream.rb +++ b/lib/active_fedora/nokogiri_datastream.rb @@ -1,10 +1,14 @@ require "nokogiri" require "om" +require "solrizer/xml" + #this class represents a MetadataDatastream, a special case of ActiveFedora::Datastream class ActiveFedora::NokogiriDatastream < ActiveFedora::Datastream include ActiveFedora::MetadataDatastreamHelper include OM::XML::Document + include Solrizer::XML::TerminologyBasedSolrizer # this adds support for calling .to_solr + # extend(OM::XML::Container::ClassMethods) attr_accessor :ng_xml @@ -75,58 +79,6 @@ def to_xml(xml = self.ng_xml) return xml.to_xml {|config| config.no_declaration} end - def to_solr(solr_doc = Solr::Document.new) # :nodoc: - - unless self.class.accessors.nil? - self.class.accessors.each_pair do |accessor_name,accessor_info| - solrize_accessor(accessor_name, accessor_info, :solr_doc=>solr_doc) - end - end - - return solr_doc - end - - def solrize_accessor(accessor_name, accessor_info, opts={}) - solr_doc = opts.fetch(:solr_doc, Solr::Document.new) - parents = opts.fetch(:parents, []) - - accessor_pointer = parents+[accessor_name] - - if accessor_info.nil? - accessor_info = self.class.accessor_info(accessor_pointer) - if accessor_info.nil? - raise "No accessor is defined for #{accessor_info.select}" - end - end - - # prep children hash - child_accessors = accessor_info.fetch(:children, {}) - xpath = self.class.accessor_xpath(*accessor_pointer) - nodeset = lookup(xpath) - - nodeset.each do |node| - # create solr fields - solrize_node(node, accessor_pointer, solr_doc) - child_accessors.each_pair do |child_accessor_name, child_accessor_info| - solrize_accessor(child_accessor_name, child_accessor_info, opts={:solr_doc=>solr_doc, :parents=>parents+[{accessor_name=>nodeset.index(node)}] }) - end - end - - end - - def solrize_node(node, accessor_pointer, solr_doc = Solr::Document.new) - generic_field_name_base = self.class.accessor_generic_name(*accessor_pointer) - generic_field_name = generate_solr_symbol(generic_field_name_base, :text) - - solr_doc << Solr::Field.new(generic_field_name => node.text) - - if accessor_pointer.length > 1 - hierarchical_field_name_base = self.class.accessor_hierarchical_name(*accessor_pointer) - hierarchical_field_name = generate_solr_symbol(hierarchical_field_name_base, :text) - solr_doc << Solr::Field.new(hierarchical_field_name => node.text) - end - end - def update_indexed_attributes(params={}, opts={}) if self.class.terminology.nil? raise "No terminology is set for this NokogiriDatastream class. Cannot perform update_indexed_attributes" diff --git a/lib/active_fedora/rels_ext_datastream.rb b/lib/active_fedora/rels_ext_datastream.rb index 4c8ed8744..bb50cb243 100644 --- a/lib/active_fedora/rels_ext_datastream.rb +++ b/lib/active_fedora/rels_ext_datastream.rb @@ -1,9 +1,10 @@ +require 'solrizer/field_name_mapper' module ActiveFedora class RelsExtDatastream < Datastream include ActiveFedora::SemanticNode - include ActiveFedora::SolrMapper + include Solrizer::FieldNameMapper def initialize(attrs=nil) diff --git a/lib/active_fedora/solr_service.rb b/lib/active_fedora/solr_service.rb index eadd70008..e62051551 100644 --- a/lib/active_fedora/solr_service.rb +++ b/lib/active_fedora/solr_service.rb @@ -1,10 +1,9 @@ require 'solr' -require "active_fedora/solr_mapper" -require "yaml" -module ActiveFedora - class SolrService +require "solrizer/field_name_mapper" + +module ActiveFedora + class SolrService - @@mappings = {} attr_reader :conn def self.register(host=nil, args={}) @@ -28,7 +27,7 @@ def self.reify_solr_results(solr_result) end results = [] solr_result.hits.each do |hit| - model_value = hit[Solrizer::SolrMapper.solr_name("active_fedora_model", :symbol)].first + model_value = hit[Solrizer::FieldNameMapper.solr_name("active_fedora_model", :symbol)].first if model_value.include?("::") classname = eval(model_value) else @@ -53,9 +52,22 @@ def self.escape_uri_for_query(uri) return uri.gsub(/(:)/, '\\:') end + def self.mappings + Solrizer::FieldNameMapper.mappings + end + def self.mappings=(mappings) + Solrizer::FieldNameMapper.mappings = mappings + end + def self.logger @logger ||= defined?(RAILS_DEFAULT_LOGGER) ? RAILS_DEFAULT_LOGGER : Logger.new(STDOUT) end - + + # (re)load solr field name mappings + def load_mappings( config_path=nil ) + Solrizer::FieldNameMapper.load_mappings(config_path) + end + class SolrNotInitialized < StandardError;end -end +end #SolrService +end #ActiveFedora diff --git a/spec/unit/base_file_management_spec.rb b/spec/unit/base_file_management_spec.rb index b39f24e8e..266d39e6d 100644 --- a/spec/unit/base_file_management_spec.rb +++ b/spec/unit/base_file_management_spec.rb @@ -1,5 +1,4 @@ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper') -require "active_fedora" # Some tentative extensions to ActiveFedora::Base diff --git a/spec/unit/nokogiri_datastream_spec.rb b/spec/unit/nokogiri_datastream_spec.rb index 92dbc5bbe..5fa7558a7 100644 --- a/spec/unit/nokogiri_datastream_spec.rb +++ b/spec/unit/nokogiri_datastream_spec.rb @@ -20,6 +20,10 @@ after(:each) do end + it "should include the Solrizer::XML::TerminologyBasedSolrizer for .to_solr support" do + ActiveFedora::NokogiriDatastream.included_modules.should include(Solrizer::XML::TerminologyBasedSolrizer) + end + describe '#new' do it 'should provide #new' do ActiveFedora::NokogiriDatastream.should respond_to(:new) @@ -275,140 +279,4 @@ end end - - describe ".to_solr" do - - after(:all) do - # Revert to default mappings after running tests - ActiveFedora::SolrService.load_mappings - end - - it "should iterate through the class accessors, calling .solrize_accessor on each and passing in the solr doc" do - mock_accessors = {:accessor1=>:accessor1_info, :accessor2=>:accessor2_info} - ActiveFedora::NokogiriDatastream.stubs(:accessors).returns(mock_accessors) - doc = Solr::Document.new - mock_accessors.each_pair do |k,v| - @test_ds.expects(:solrize_accessor).with(k, v, :solr_doc=>doc) - end - @test_ds.to_solr(doc) - end - - it "should provide .to_solr and return a SolrDocument" do - @test_ds.should respond_to(:to_solr) - @test_ds.to_solr.should be_kind_of(Solr::Document) - end - - it "should optionally allow you to provide the Solr::Document to add fields to and return that document when done" do - doc = Solr::Document.new - @test_ds.to_solr(doc).should equal(doc) - end - - end - - describe ".solrize_accessor" do - # before(:all) do - # class AccessorizedDs < ActiveFedora::NokogiriDatastream - # - # root_property :mods, "mods", "http://www.loc.gov/mods/v3", :attributes=>["id", "version"], :schema=>"http://www.loc.gov/standards/mods/v3/mods-3-2.xsd" - # - # accessor :title_info, :relative_xpath=>'oxns:titleInfo', :children=>[ - # {:main_title=>{:relative_xpath=>'oxns:title'}}, - # {:language =>{:relative_xpath=>{:attribute=>"lang"} }} - # ] - # accessor :finnish_title_info, :relative_xpath=>'oxns:titleInfo[@lang="finnish"]', :children=>[ - # {:main_title=>{:relative_xpath=>'oxns:title'}}, - # {:language =>{:relative_xpath=>{:attribute=>"lang"} }} - # ] - # accessor :abstract - # accessor :topic_tag, :relative_xpath=>'oxns:subject/oxns:topic' - # accessor :person, :relative_xpath=>'oxns:name[@type="personal"]', :children=>[ - # {:last_name=>{:relative_xpath=>'oxns:namePart[@type="family"]'}}, - # {:first_name=>{:relative_xpath=>'oxns:namePart[@type="given"]'}}, - # {:institution=>{:relative_xpath=>'oxns:affiliation'}}, - # {:role=>{:children=>[ - # {:text=>{:relative_xpath=>'oxns:roleTerm[@type="text"]'}}, - # {:code=>{:relative_xpath=>'oxns:roleTerm[@type="code"]'}} - # ]}} - # ] - # end - # end - # - # before(:each) do - # file = fixture(File.join("mods_articles", "hydrangea_article1.xml")) - # @mods_ds = AccessorizedDs.new(:blob=>file) - # end - - before(:each) do - @mods_ds = Hydra::SampleModsDatastream.new(:blob=>fixture(File.join("mods_articles","hydrangea_article1.xml"))) - end - - it "should perform a lookup and iterate over nodes in the result set calling solrize_node then calling solrize_accessor on any of the children, adding accessor_name & node index to parents array" do - mock_title_info_set = ["TI1", "TI2"] - mock_main_title_set = ["main title"] - mock_language_set = ["language"] - - solr_doc = Solr::Document.new - - AccessorizedDs.expects(:accessor_xpath).with( :title_info ).returns("title_info_xpath") - @mods_ds.expects(:lookup).with( "title_info_xpath" ).returns(mock_title_info_set) - - mock_title_info_set.each do |tin| - node_index = mock_title_info_set.index(tin) - @mods_ds.expects(:solrize_node).with(tin, [:title_info], solr_doc) - - # Couldn't mock the recursive calls to solrize_accessor without preventing the initial one, so was forced to mock out the whole recursive stack. - # @mods_ds.expects(:solrize_accessor).with(:main_title, AccessorizedDs.accessors[:title_info][:children][:main_title], :parents=>[{:title_info=>node_index}]) - # @mods_ds.expects(:solrize_accessor).with(:language, AccessorizedDs.accessors[:title_info][:children][:language], :parents=>[{:title_info=>node_index}]) - AccessorizedDs.expects(:accessor_xpath).with( {:title_info=>node_index}, :main_title ).returns("title_info_main_title_xpath") - AccessorizedDs.expects(:accessor_xpath).with( {:title_info=>node_index}, :language ).returns("title_info_language_xpath") - @mods_ds.expects(:lookup).with( "title_info_main_title_xpath" ).returns(mock_main_title_set) - @mods_ds.expects(:lookup).with( "title_info_language_xpath" ).returns(mock_language_set) - @mods_ds.expects(:solrize_node).with("main title", [{:title_info=>node_index}, :main_title], solr_doc) - @mods_ds.expects(:solrize_node).with("language", [{:title_info=>node_index}, :language], solr_doc) - end - - @mods_ds.solrize_accessor(:title_info, AccessorizedDs.accessors[:title_info], :solr_doc=>solr_doc) - - end - - it "should not call solrize_accessor once it reaches an accessor with no children accessors set" do - pending "not sure how to test for this" - @mods_ds.solrize_accessor(:text, AccessorizedDs.accessor_info( [{:person=>1}, :last_name] ), :parents=>[{:person=>1}]) - end - - it "should use values form parents array when requesting accessor_xpath and when generating solr field names" do - parents_array = [{:person=>0}, {:role=>1}] - AccessorizedDs.accessors[:person][:children][:role][:children][:text] - - # This should catch the "submitter" roleTerm from the second role node within the first person node and put it into a solr field called "person_0_role_2_text_0_t" and a solr field called "person_role_text_t" - @mods_ds.solrize_accessor(:text, AccessorizedDs.accessor_info( *parents_array + [:text] ), :parents=>parents_array) - end - - it "should use Solr mappings to generate field names" do - - solr_doc = @mods_ds.to_solr - #should have these - - solr_doc[:abstract_t].should == "ABSTRACT" - solr_doc[:title_info_1_language_t].should == "finnish" - solr_doc[:person_1_role_0_text_t].should == "teacher" - solr_doc[:finnish_title_info_language_t].should == "finnish" - solr_doc[:finnish_title_info_main_title_t].should == "Artikkelin otsikko Hydrangea artiklan 1" - - # solr_doc[:mydate_date].should == "fake-date" - # - # solr_doc[:publisher_t].should be_nil - # solr_doc[:coverage_t].should be_nil - # solr_doc[:creation_date_dt].should be_nil - # solr_doc.should == "" - - end - end - - describe ".solrize_node" do - it "should create a solr field containing node.text" - it "should create hierarchical field entries if parents is not empty" - it "should only create one node if parents is empty" - end - end diff --git a/spec/unit/solr_mapper_spec.rb b/spec/unit/solr_mapper_spec.rb deleted file mode 100644 index 10971ad1a..000000000 --- a/spec/unit/solr_mapper_spec.rb +++ /dev/null @@ -1,31 +0,0 @@ -require File.join( File.dirname(__FILE__), "..", "spec_helper" ) - -require 'active_fedora/solr_service' -require 'active_fedora/solr_mapper' - -include ActiveFedora::SolrMapper - -describe ActiveFedora::SolrMapper do - - after(:all) do - # Revert to default mappings after running tests - ActiveFedora::SolrService.load_mappings - end - - describe ".solr_name" do - it "should generate solr field names from settings in solr_mappings" do - solr_name(:system_create, :date).should == :system_create_dt - end - it "should format the response based on the class of the input" do - solr_name(:system_create, :date).should == :system_create_dt - solr_name("system_create", :date).should == "system_create_dt" - end - it "should rely on whichever mappings have been loaded into the SolrService" do - solr_name(:system_create, :date).should == :system_create_dt - solr_name(:foo, :text).should == :foo_t - ActiveFedora::SolrService.load_mappings(File.join(File.dirname(__FILE__), "..", "..", "config", "solr_mappings_af_0.1.yml")) - solr_name(:system_create, :date).should == :system_create_date - solr_name(:foo, :text).should == :foo_field - end - end -end \ No newline at end of file