Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Marc alternative titles #2598

Merged
merged 20 commits into from
Apr 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package weco.pipeline.transformer.marc_common.transformers

import weco.pipeline.transformer.marc_common.models.{
MarcField,
MarcRecord,
MarcSubfield
}

// Populate work:alternativeTitles
//
// The following fields are used as possible alternative titles:
// * 240 $a https://www.loc.gov/marc/bibliographic/bd240.html
// * 130 $a http://www.loc.gov/marc/bibliographic/bd130.html
// * 246 $a https://www.loc.gov/marc/bibliographic/bd246.html

object MarcAlternativeTitles extends MarcDataTransformer {

override type Output = Seq[String]

override def apply(record: MarcRecord): Seq[String] = {
record
.fieldsWithTags("240", "130", "246")
.withoutCaptionTitles
.map(field => alternativeTitle(field))
.filterNot(_.isEmpty)
.distinct
}

private def alternativeTitle(field: MarcField): String =
field.subfields.withoutUKLW.map(_.content).mkString(" ")

implicit private class FieldsOps(fields: Seq[MarcField]) {

// 246 with ind2 = 6 indicates a Caption Title
// "printed at the head of the first page of text. Caption title: may be generated with the note for display."
// This is not an alternative title that we want to capture here.
def withoutCaptionTitles: Seq[MarcField] =
fields filterNot {
field =>
field.marcTag == "246" && field.indicator2 == "6"
}
}
implicit private class SubfieldsOps(subfields: Seq[MarcSubfield]) {
// Any $5 subfield with contents `UkLW` is Wellcome Library-specific and
// should be omitted.
def withoutUKLW: Seq[MarcSubfield] =
subfields.filterNot(_ == MarcSubfield("5", "UkLW"))

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,337 @@
package weco.pipeline.transformer.marc_common.transformers

import org.scalatest.LoneElement
import org.scalatest.funspec.AnyFunSpec
import org.scalatest.matchers.should.Matchers
import org.scalatest.prop.TableDrivenPropertyChecks._
import weco.pipeline.transformer.marc_common.generators.MarcTestRecord
import weco.pipeline.transformer.marc_common.models.{MarcField, MarcSubfield}
import scala.util.Random
class MarcAlternativeTitlesTest
extends AnyFunSpec
with Matchers
with LoneElement {

describe("extracting alternative titles from 130, 240, and 246 fields") {
info("https://www.loc.gov/marc/bibliographic/bd130.html")
info("https://www.loc.gov/marc/bibliographic/bd240.html")
info("https://www.loc.gov/marc/bibliographic/bd246.html")
describe("returning nothing") {
it(
"does not extract alternative titles if 130, 240, and 246 are absent"
) {
MarcAlternativeTitles(
MarcTestRecord(
fields = Seq(
MarcField(
marcTag = "999",
subfields = Seq(MarcSubfield(tag = "a", content = "mafeesh"))
)
)
)
) shouldBe Nil
}

it("does not return an empty alternative title given an empty field") {
MarcAlternativeTitles(
MarcTestRecord(
fields = Seq(
MarcField(
marcTag = "130",
subfields = Seq(MarcSubfield(tag = "a", content = ""))
)
)
)
) shouldBe Nil
}

it(
"does not return an empty alternative title given a field whose content is entirely filtered out"
) {
MarcAlternativeTitles(
MarcTestRecord(
fields = Seq(
MarcField(
marcTag = "246",
subfields = Seq(MarcSubfield(tag = "5", content = "UkLW"))
)
)
)
) shouldBe Nil
}

it("ignores 'caption title' fields, i.e. 246 fields with ind2=6") {
MarcAlternativeTitles(
MarcTestRecord(
fields = Seq(
MarcField(
marcTag = "246",
subfields =
Seq(MarcSubfield(tag = "a", content = "I am a caption")),
indicator2 = "6"
)
)
)
) shouldBe Nil
}

}
}
describe("extracting a single alternative title") {
forAll(
Table(
"tag",
"130",
"240",
"246"
)
) {
tag =>
describe(s"extracting an alternative title from $tag") {
it(s"extracts an alternative tile from field $tag") {
MarcAlternativeTitles(
MarcTestRecord(
fields = Seq(
MarcField(
marcTag = tag,
subfields =
Seq(MarcSubfield(tag = "a", content = "mafeesh"))
)
)
)
).loneElement shouldBe "mafeesh"
}

it(
s"concatenates all subfields of $tag in document order to make the alternative title"
) {
val shuffled = Random.shuffle(subfieldLists(tag))
val subfields = shuffled.map(
subtag => MarcSubfield(tag = subtag, content = subtag.toUpperCase)
)
val expectedTitle =
shuffled.map(_.toUpperCase).mkString(" ")

MarcAlternativeTitles(
MarcTestRecord(
fields = Seq(
MarcField(
marcTag = tag,
subfields = subfields
)
)
)
).loneElement shouldBe expectedTitle
}
if (tag == "246") {
it("ignores subfield 246$5 if its value is UkLW") {
info("$5UkLW is Wellcome Library-specific and should be omitted")
info(
"$5 is non-repeating, so this example should not exist in Real Life"
)
info(" but this test demonstrates that the existence oof $5UkLW")
info(
" does not impact the transformer's ability to extract anything else"
)
MarcAlternativeTitles(
MarcTestRecord(
fields = Seq(
MarcField(
marcTag = tag,
subfields = Seq(
MarcSubfield(tag = "a", content = "Pinakes"),
MarcSubfield(tag = "5", content = "UkLW"),
MarcSubfield(tag = "5", content = "Mouseion")
)
)
)
)
).loneElement shouldBe "Pinakes Mouseion"
}
}
}
}
}
describe("extracting multiple alternative titles") {
it("extracts alternative titles from all relevant fields") {
MarcAlternativeTitles(
MarcTestRecord(
fields = Seq(
MarcField(
marcTag = "130",
subfields = Seq(
MarcSubfield(
tag = "a",
content = "I'm very well acquainted too"
)
)
),
MarcField(
marcTag = "240",
subfields = Seq(
MarcSubfield(tag = "a", content = "with matters mathematical")
)
),
MarcField(
marcTag = "246",
subfields =
Seq(MarcSubfield(tag = "a", content = "I understand equations"))
),
MarcField(
marcTag = "246",
subfields = Seq(MarcSubfield(tag = "a", content = "both simple"))
),
MarcField(
marcTag = "240",
subfields =
Seq(MarcSubfield(tag = "a", content = "and quadratical"))
),
MarcField(
marcTag = "130",
subfields = Seq(
MarcSubfield(
tag = "a",
content =
"About binomial theorem I am teeming with a lot o' news"
)
)
)
)
)
) should contain theSameElementsAs Seq(
"I'm very well acquainted too",
"with matters mathematical",
"I understand equations",
"both simple",
"and quadratical",
"About binomial theorem I am teeming with a lot o' news"
)
}
it("does not return duplicate alternative titles") {
MarcAlternativeTitles(
MarcTestRecord(
fields = Seq(
MarcField(
marcTag = "130",
subfields = Seq(
MarcSubfield(
tag = "a",
content =
"With many cheerful facts about the square of the hypotenuse"
)
)
),
MarcField(
marcTag = "240",
subfields = Seq(
MarcSubfield(
tag = "a",
content =
"With many cheerful facts about the square of the hypotenuse"
)
)
),
MarcField(
marcTag = "246",
subfields = Seq(
MarcSubfield(
tag = "a",
content =
"With many cheerful facts about the square of the hypotenuse"
)
)
),
MarcField(
marcTag = "246",
subfields = Seq(
MarcSubfield(
tag = "a",
content =
"With many cheerful facts about the square of the hypoten-potenuse"
)
)
)
)
)
) should contain theSameElementsAs Seq(
"With many cheerful facts about the square of the hypotenuse",
"With many cheerful facts about the square of the hypoten-potenuse"
)
}
it("only filters on ind2=6 for 246 fields") {
info("246 with indicator2 is a caption title")
info("this is not true of 130 and 240")
val fields = Seq(
"130" -> "I am not a caption",
"246" -> "I am a caption",
"240" -> "Nor am I"
) map {
case (tag, content) =>
MarcField(
indicator2 = "6",
marcTag = tag,
subfields = Seq(
MarcSubfield(
tag = "a",
content = content
)
)
)
}

MarcAlternativeTitles(
MarcTestRecord(fields = fields)
) should contain theSameElementsAs Seq(
"I am not a caption",
"Nor am I"
)
}
}

private val subfieldLists = Map(
"130" -> Seq(
"a",
"d",
"f",
"g",
"h",
"k",
"l",
"m",
"n",
"o",
"p",
"r",
"s",
"t",
"0",
"1",
"2",
"6",
"7",
"8"
),
"240" -> Seq(
"a",
"d",
"f",
"g",
"h",
"k",
"l",
"m",
"n",
"o",
"p",
"r",
"s",
"0",
"1",
"2",
"6",
"7",
"8"
),
"246" -> Seq("a", "b", "f", "g", "h", "i", "n", "p", "5", "6", "7", "8")
)
}
Loading
Loading