Skip to content

Commit

Permalink
fix: Yahoo sanitizer for search query links (#289)
Browse files Browse the repository at this point in the history
  • Loading branch information
svenjacobs authored Oct 4, 2023
1 parent f4db4f3 commit ce0776b
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,11 @@ abstract class SearchResultSanitizer(
private val regex: Regex,
) : Sanitizer {

override fun invoke(input: String): String {
val result = regex.find(input) ?: return input
val group = result.groups[1] ?: return input
return decodeUrl(group.value)
}
override fun invoke(input: String): String = extractSearchResultValue(regex, input)
}

internal fun extractSearchResultValue(regex: Regex, input: String): String {
val result = regex.find(input) ?: return input
val group = result.groups[1] ?: return input
return decodeUrl(group.value)
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,13 @@ package com.svenjacobs.app.leon.core.domain.sanitizer.yahoo

import android.content.Context
import com.svenjacobs.app.leon.core.common.domain.matchesDomain
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.Sanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.SanitizerId
import com.svenjacobs.app.leon.core.domain.sanitizer.SearchResultSanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.extractSearchResultValue

class YahooSearchSanitizer : SearchResultSanitizer(
Regex("RU=([^/]+)"),
) {
class YahooSearchSanitizer : Sanitizer {

override val id = SanitizerId("yahoo_search")

Expand All @@ -36,4 +35,14 @@ class YahooSearchSanitizer : SearchResultSanitizer(
)

override fun matchesDomain(input: String) = input.matchesDomain("search.yahoo.com")

override fun invoke(input: String): String = if (input.contains("/search")) {
RegexFactory.exceptParameter("p").replace(input, "")
} else {
extractSearchResultValue(REGEX_SEARCH_RESULT, input)
}

companion object {
private val REGEX_SEARCH_RESULT = Regex("RU=([^/]+)")
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Léon - The URL Cleaner
* Copyright (C) 2022 Sven Jacobs
* Copyright (C) 2023 Sven Jacobs
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand All @@ -23,19 +23,30 @@ import io.kotest.matchers.shouldBe

class YahooSearchSanitizerTest : WordSpec(
{
val sanitizer = YahooSearchSanitizer()

"invoke" should {

"extract URL from Yahoo search link" {
val sanitizer = YahooSearchSanitizer()

val result = sanitizer(
"extract URL from Yahoo search result link" {
sanitizer(
"https://r.search.yahoo.com/_ylt=A0geKLovoVtisIEAUapx.9w4;_ylu=Y29sbwNi" +
"ZjEEcG9zAzQEdnRpZAMEc2VjA3Ny/RV=2/RE=1650201007/RO=10/RU=https%3a%2f%2fg" +
"ithub.com%2fsvenjacobs%2fleon/RK=2/RS=rHoItccMzwyZAXsJuDMkBaKUMx0-",
)
) shouldBe "https://github.com/svenjacobs/leon"
}

"clean Yahoo search query link" {
sanitizer(
"https://search.yahoo.com/search?p=hi&fr=yfp-hrmob&fr2=p%3Afp%2Cm%3Asb&" +
".tsrc=yfp-hrmob&ei=UTF-8&fp=1&toggle=1&cop=mss",
) shouldBe "https://search.yahoo.com/search?p=hi"
}
}

"matchesDomain" should {

result shouldBe "https://github.com/svenjacobs/leon"
"match search.yahoo.com" {
sanitizer.matchesDomain("https://search.yahoo.com") shouldBe true
}
}
},
Expand Down

0 comments on commit ce0776b

Please sign in to comment.