From ce0776ba8562167838be40340b7b78e63933456a Mon Sep 17 00:00:00 2001 From: Sven Jacobs Date: Wed, 4 Oct 2023 08:48:29 +0200 Subject: [PATCH] fix: Yahoo sanitizer for search query links (#289) --- .../domain/sanitizer/SearchResultSanitizer.kt | 12 +++++---- .../sanitizer/yahoo/YahooSearchSanitizer.kt | 17 ++++++++++--- .../yahoo/YahooSearchSanitizerTest.kt | 25 +++++++++++++------ 3 files changed, 38 insertions(+), 16 deletions(-) diff --git a/core-domain/src/main/kotlin/com/svenjacobs/app/leon/core/domain/sanitizer/SearchResultSanitizer.kt b/core-domain/src/main/kotlin/com/svenjacobs/app/leon/core/domain/sanitizer/SearchResultSanitizer.kt index c69db6aa..a4901a70 100644 --- a/core-domain/src/main/kotlin/com/svenjacobs/app/leon/core/domain/sanitizer/SearchResultSanitizer.kt +++ b/core-domain/src/main/kotlin/com/svenjacobs/app/leon/core/domain/sanitizer/SearchResultSanitizer.kt @@ -29,9 +29,11 @@ abstract class SearchResultSanitizer( private val regex: Regex, ) : Sanitizer { - override fun invoke(input: String): String { - val result = regex.find(input) ?: return input - val group = result.groups[1] ?: return input - return decodeUrl(group.value) - } + override fun invoke(input: String): String = extractSearchResultValue(regex, input) +} + +internal fun extractSearchResultValue(regex: Regex, input: String): String { + val result = regex.find(input) ?: return input + val group = result.groups[1] ?: return input + return decodeUrl(group.value) } diff --git a/core-domain/src/main/kotlin/com/svenjacobs/app/leon/core/domain/sanitizer/yahoo/YahooSearchSanitizer.kt b/core-domain/src/main/kotlin/com/svenjacobs/app/leon/core/domain/sanitizer/yahoo/YahooSearchSanitizer.kt index f8e03084..c80a91d0 100644 --- a/core-domain/src/main/kotlin/com/svenjacobs/app/leon/core/domain/sanitizer/yahoo/YahooSearchSanitizer.kt +++ b/core-domain/src/main/kotlin/com/svenjacobs/app/leon/core/domain/sanitizer/yahoo/YahooSearchSanitizer.kt @@ -20,14 +20,13 @@ package com.svenjacobs.app.leon.core.domain.sanitizer.yahoo import android.content.Context import com.svenjacobs.app.leon.core.common.domain.matchesDomain +import com.svenjacobs.app.leon.core.common.regex.RegexFactory import com.svenjacobs.app.leon.core.domain.R import com.svenjacobs.app.leon.core.domain.sanitizer.Sanitizer import com.svenjacobs.app.leon.core.domain.sanitizer.SanitizerId -import com.svenjacobs.app.leon.core.domain.sanitizer.SearchResultSanitizer +import com.svenjacobs.app.leon.core.domain.sanitizer.extractSearchResultValue -class YahooSearchSanitizer : SearchResultSanitizer( - Regex("RU=([^/]+)"), -) { +class YahooSearchSanitizer : Sanitizer { override val id = SanitizerId("yahoo_search") @@ -36,4 +35,14 @@ class YahooSearchSanitizer : SearchResultSanitizer( ) override fun matchesDomain(input: String) = input.matchesDomain("search.yahoo.com") + + override fun invoke(input: String): String = if (input.contains("/search")) { + RegexFactory.exceptParameter("p").replace(input, "") + } else { + extractSearchResultValue(REGEX_SEARCH_RESULT, input) + } + + companion object { + private val REGEX_SEARCH_RESULT = Regex("RU=([^/]+)") + } } diff --git a/core-domain/src/test/kotlin/com/svenjacobs/app/leon/core/domain/sanitizer/yahoo/YahooSearchSanitizerTest.kt b/core-domain/src/test/kotlin/com/svenjacobs/app/leon/core/domain/sanitizer/yahoo/YahooSearchSanitizerTest.kt index b5df1b18..46a278dd 100644 --- a/core-domain/src/test/kotlin/com/svenjacobs/app/leon/core/domain/sanitizer/yahoo/YahooSearchSanitizerTest.kt +++ b/core-domain/src/test/kotlin/com/svenjacobs/app/leon/core/domain/sanitizer/yahoo/YahooSearchSanitizerTest.kt @@ -1,6 +1,6 @@ /* * Léon - The URL Cleaner - * Copyright (C) 2022 Sven Jacobs + * Copyright (C) 2023 Sven Jacobs * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -23,19 +23,30 @@ import io.kotest.matchers.shouldBe class YahooSearchSanitizerTest : WordSpec( { + val sanitizer = YahooSearchSanitizer() "invoke" should { - "extract URL from Yahoo search link" { - val sanitizer = YahooSearchSanitizer() - - val result = sanitizer( + "extract URL from Yahoo search result link" { + sanitizer( "https://r.search.yahoo.com/_ylt=A0geKLovoVtisIEAUapx.9w4;_ylu=Y29sbwNi" + "ZjEEcG9zAzQEdnRpZAMEc2VjA3Ny/RV=2/RE=1650201007/RO=10/RU=https%3a%2f%2fg" + "ithub.com%2fsvenjacobs%2fleon/RK=2/RS=rHoItccMzwyZAXsJuDMkBaKUMx0-", - ) + ) shouldBe "https://github.com/svenjacobs/leon" + } + + "clean Yahoo search query link" { + sanitizer( + "https://search.yahoo.com/search?p=hi&fr=yfp-hrmob&fr2=p%3Afp%2Cm%3Asb&" + + ".tsrc=yfp-hrmob&ei=UTF-8&fp=1&toggle=1&cop=mss", + ) shouldBe "https://search.yahoo.com/search?p=hi" + } + } + + "matchesDomain" should { - result shouldBe "https://github.com/svenjacobs/leon" + "match search.yahoo.com" { + sanitizer.matchesDomain("https://search.yahoo.com") shouldBe true } } },