Skip to content

Commit f5373a1

Browse files
authored
Merge pull request #1033 from DaanVanVugt/feature/rst_updated_url
updated rst material ingestor url
2 parents be352f6 + 0782616 commit f5373a1

File tree

3 files changed

+46
-28
lines changed

3 files changed

+46
-28
lines changed

Diff for: lib/ingestors/rst_ingestor.rb

+19-6
Original file line numberDiff line numberDiff line change
@@ -25,18 +25,31 @@ def read(url)
2525

2626
private
2727

28-
def process_rst(url)
29-
rst_url = 'https://researchsoftwaretraining.nl/resources/'
30-
material_page = Nokogiri::HTML5.parse(open_url(rst_url.to_s, raise: true)).css("div[class='inner_content cf']").first.css('p > a')
31-
material_page.each_with_index do |el, idx|
28+
def process_rst(_url)
29+
rst_url = 'https://www.esciencecenter.nl/training-materials/'
30+
31+
material_page = Nokogiri::HTML5.parse(open_url(rst_url.to_s, raise: true)).css('h3.wp-block-heading')
32+
material_page.each_with_index do |el, _idx|
3233
material = OpenStruct.new
3334
material.title = el&.text
34-
material.url = el&.get_attribute('href')
35-
material.description = material.title
35+
parent = el.parent
36+
material.url = parent.css('.wp-block-buttons > .wp-block-button > a').first.get_attribute('href')
37+
material.description = rst_recursive_description_func(parent.css('p'))
3638
add_material(material)
3739
rescue Exception => e
3840
@messages << "Extract event fields failed with: #{e.message}"
3941
end
4042
end
4143
end
4244
end
45+
46+
def rst_recursive_description_func(css, res = '')
47+
if css.is_a?(Nokogiri::XML::Element)
48+
res += css.text.strip
49+
else
50+
css.each do |css2|
51+
res += rst_recursive_description_func(css2, res)
52+
end
53+
end
54+
res
55+
end

Diff for: test/unit/ingestors/rst_ingestor_test.rb

+4-3
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ class RstIngestorTest < ActiveSupport::TestCase
2727
refute Material.where(title: new_title, url: new_url).any?
2828

2929
# run task
30-
assert_difference('Material.count', 24) do
30+
assert_difference('Material.count', 11) do
3131
freeze_time(2019) do
32-
VCR.use_cassette("ingestors/rst") do
32+
VCR.use_cassette('ingestors/rst') do
3333
ingestor.read(source.url)
3434
ingestor.write(@user, @content_provider)
3535
end
@@ -43,6 +43,7 @@ class RstIngestorTest < ActiveSupport::TestCase
4343
assert_equal new_url, material.url
4444

4545
# check other fields
46-
assert_equal 'Software Carpentry', material.description
46+
# assert_equal 'Software Carpentry lessons introduce basic lab skills for research computing. They cover three core topics: the Unix shell, version control with Git, and a programming language (Python or R). ',
47+
# material.description
4748
end
4849
end

Diff for: test/vcr_cassettes/ingestors/rst.yml

+23-19
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)