Skip to content

Commit

Permalink
fix: Facebook sanitizer for mobile domain and story URLs (#246)
Browse files Browse the repository at this point in the history
  • Loading branch information
svenjacobs authored Sep 14, 2023
1 parent b1e0fe4 commit af6e296
Show file tree
Hide file tree
Showing 15 changed files with 62 additions and 47 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

package com.svenjacobs.app.leon.core.common.domain

fun String.matchesDomain(domain: String, isRegex: Boolean = false): Boolean {
val regexDomain = if (!isRegex) domain.replace(".", "\\.") else domain
return Regex("^(?:https?://)?(?:www\\.)?$regexDomain.*").matches(this)
}
fun String.matchesDomainRegex(domain: String): Boolean =
Regex("^(?:https?://)?(?:www\\.)?$domain.*").matches(this)

fun String.matchesDomain(domain: String): Boolean = matchesDomainRegex(domain.replace(".", "\\."))
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Léon - The URL Cleaner
* Copyright (C) 2022 Sven Jacobs
* Copyright (C) 2023 Sven Jacobs
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand All @@ -26,7 +26,7 @@ object RegexFactory {
val AllParameters = Regex("\\?.*")

/**
* Returns a regex string which matches a certain parameter.
* Returns a Regex which matches a certain parameter.
*
* For example `ofParameter("abc")` returns a regex string which matches `?abc=` or `&abc=`.
*
Expand All @@ -36,7 +36,7 @@ object RegexFactory {
fun ofParameter(parameter: String): Regex = Regex("[?&](?:$parameter)=([^&#]*)")

/**
* Returns a regex string which matches a certain parameter prefix.
* Returns a Regex which matches a certain parameter prefix.
*
* For example `ofWildcardParameter("abc_")` returns a regex string which matches `?abc_x=`,
* `&abc_y=`, `&abc_zzz=` et cetera.
Expand All @@ -45,4 +45,14 @@ object RegexFactory {
*/
@Suppress("RegExpUnnecessaryNonCapturingGroup")
fun ofWildcardParameter(parameter: String): Regex = Regex("[?&](?:$parameter)[^=]*=([^&#]*)")

/**
* Returns a Regex which removes all parameters except the specified one.
*
* Use regular expression group and OR (`|`) to include several parameters, for example
* `(a|b)`.
*
* @param parameter Parameter to exclude
*/
fun exceptParameter(parameter: String): Regex = Regex("[?&](?!$parameter=)[^&]+")
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,8 @@ class DomainExtensionsTest : WordSpec(
}

"match domain with regular expression values" {
"https://aliexpress.com/item/32948511896".matchesDomain(
"https://aliexpress.com/item/32948511896".matchesDomainRegex(
domain = "aliexpress\\..+/item/",
isRegex = true,
) shouldBe true
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
package com.svenjacobs.app.leon.core.domain.sanitizer.aliexpress

import android.content.Context
import com.svenjacobs.app.leon.core.common.domain.matchesDomain
import com.svenjacobs.app.leon.core.common.domain.matchesDomainRegex
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.RegexSanitizer
Expand All @@ -36,6 +36,5 @@ class AliexpressSanitizer : RegexSanitizer(
name = context.getString(R.string.sanitizer_aliexpress_name),
)

override fun matchesDomain(input: String) =
input.matchesDomain("aliexpress\\..+/item/", isRegex = true)
override fun matchesDomain(input: String) = input.matchesDomainRegex("aliexpress\\..+/item/")
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
package com.svenjacobs.app.leon.core.domain.sanitizer.amazon

import android.content.Context
import com.svenjacobs.app.leon.core.common.domain.matchesDomain
import com.svenjacobs.app.leon.core.common.domain.matchesDomainRegex
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.RegexSanitizer
Expand All @@ -40,5 +40,5 @@ class AmazonSanitizer : RegexSanitizer(
name = context.getString(R.string.sanitizer_amazon_name),
)

override fun matchesDomain(input: String) = input.matchesDomain("amazon\\..+/", isRegex = true)
override fun matchesDomain(input: String) = input.matchesDomainRegex("amazon\\..+/")
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
package com.svenjacobs.app.leon.core.domain.sanitizer.ebay

import android.content.Context
import com.svenjacobs.app.leon.core.common.domain.matchesDomain
import com.svenjacobs.app.leon.core.common.domain.matchesDomainRegex
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.RegexSanitizer
Expand All @@ -36,5 +36,5 @@ class EbaySanitizer : RegexSanitizer(
name = context.getString(R.string.sanitizer_ebay_name),
)

override fun matchesDomain(input: String) = input.matchesDomain("ebay\\..+/itm/", isRegex = true)
override fun matchesDomain(input: String) = input.matchesDomainRegex("ebay\\..+/itm/")
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@
package com.svenjacobs.app.leon.core.domain.sanitizer.facebook

import android.content.Context
import com.svenjacobs.app.leon.core.common.domain.matchesDomain
import com.svenjacobs.app.leon.core.common.domain.matchesDomainRegex
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.RegexSanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.Sanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.SanitizerId

class FacebookSanitizer : RegexSanitizer(
regex = RegexFactory.AllParameters,
regex = RegexFactory.exceptParameter("(id|story_fbid)"),
) {

override val id = SanitizerId("facebook_com")
Expand All @@ -36,5 +36,7 @@ class FacebookSanitizer : RegexSanitizer(
name = context.getString(R.string.sanitizer_facebook_name),
)

override fun matchesDomain(input: String) = input.matchesDomain("facebook.com")
override fun matchesDomain(input: String) = input.matchesDomainRegex(
domain = "(m\\.)?facebook.com",
)
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
package com.svenjacobs.app.leon.core.domain.sanitizer.georiot

import android.content.Context
import com.svenjacobs.app.leon.core.common.domain.matchesDomain
import com.svenjacobs.app.leon.core.common.domain.matchesDomainRegex
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.Sanitizer
Expand All @@ -37,5 +37,5 @@ class GeoRiotSanitizer : SearchResultSanitizer(
)

override fun matchesDomain(input: String) =
input.matchesDomain("target.georiot\\.[^/]+/Proxy.ashx", isRegex = true)
input.matchesDomainRegex("target.georiot\\.[^/]+/Proxy.ashx")
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
package com.svenjacobs.app.leon.core.domain.sanitizer.google

import android.content.Context
import com.svenjacobs.app.leon.core.common.domain.matchesDomain
import com.svenjacobs.app.leon.core.common.domain.matchesDomainRegex
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.Sanitizer
Expand All @@ -36,6 +36,5 @@ class GoogleSearchSanitizer : SearchResultSanitizer(
name = context.getString(R.string.sanitizer_google_search_name),
)

override fun matchesDomain(input: String) =
input.matchesDomain("google\\.[^/]+/url", isRegex = true)
override fun matchesDomain(input: String) = input.matchesDomainRegex("google\\.[^/]+/url")
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
package com.svenjacobs.app.leon.core.domain.sanitizer.linksynergy

import android.content.Context
import com.svenjacobs.app.leon.core.common.domain.matchesDomain
import com.svenjacobs.app.leon.core.common.domain.matchesDomainRegex
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.Sanitizer
Expand All @@ -36,6 +36,5 @@ class LinkSynergySanitizer : SearchResultSanitizer(
name = context.getString(R.string.sanitizer_linksynergy_name),
)

override fun matchesDomain(input: String) =
input.matchesDomain("linksynergy\\.[^/]+/link", isRegex = true)
override fun matchesDomain(input: String) = input.matchesDomainRegex("linksynergy\\.[^/]+/link")
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
package com.svenjacobs.app.leon.core.domain.sanitizer.netflix

import android.content.Context
import com.svenjacobs.app.leon.core.common.domain.matchesDomain
import com.svenjacobs.app.leon.core.common.domain.matchesDomainRegex
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.RegexSanitizer
Expand All @@ -36,8 +36,7 @@ class NetflixSanitizer : RegexSanitizer(
name = context.getString(R.string.sanitizer_netflix_name),
)

override fun matchesDomain(input: String) = input.matchesDomain(
override fun matchesDomain(input: String) = input.matchesDomainRegex(
domain = "(help\\.)?netflix.com",
isRegex = true,
)
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
package com.svenjacobs.app.leon.core.domain.sanitizer.spotify

import android.content.Context
import com.svenjacobs.app.leon.core.common.domain.matchesDomain
import com.svenjacobs.app.leon.core.common.domain.matchesDomainRegex
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.RegexSanitizer
Expand All @@ -36,6 +36,5 @@ class SpotifySanitizer : RegexSanitizer(
name = context.getString(R.string.sanitizer_spotify_name),
)

override fun matchesDomain(input: String) =
input.matchesDomain("(open\\.)?spotify\\.com", isRegex = true)
override fun matchesDomain(input: String) = input.matchesDomainRegex("(open\\.)?spotify\\.com")
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
package com.svenjacobs.app.leon.core.domain.sanitizer.x

import android.content.Context
import com.svenjacobs.app.leon.core.common.domain.matchesDomain
import com.svenjacobs.app.leon.core.common.domain.matchesDomainRegex
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.RegexSanitizer
Expand All @@ -36,8 +36,7 @@ class XSanitizer : RegexSanitizer(
name = context.getString(R.string.sanitizer_x_name),
)

override fun matchesDomain(input: String) = input.matchesDomain(
override fun matchesDomain(input: String) = input.matchesDomainRegex(
domain = "(twitter|x)\\.com",
isRegex = true,
)
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,22 @@
package com.svenjacobs.app.leon.core.domain.sanitizer.youtube

import android.content.Context
import com.svenjacobs.app.leon.core.common.domain.matchesDomain
import com.svenjacobs.app.leon.core.common.domain.matchesDomainRegex
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.RegexSanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.Sanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.SanitizerId

class YoutubeSanitizer : Sanitizer {
class YoutubeSanitizer : RegexSanitizer(
RegexFactory.exceptParameter("(v|search_query)"),
) {

override val id = SanitizerId("youtube")

override fun getMetadata(context: Context) = Sanitizer.Metadata(
name = context.getString(R.string.sanitizer_youtube_name),
)

override fun matchesDomain(input: String) =
input.matchesDomain("(m\\.)?youtube\\.com", isRegex = true)

override fun invoke(input: String) = PARAMS_REGEX.replace(input, "")

private companion object {
private val PARAMS_REGEX = Regex("[?&](?!(v|search_query)=)[^&]+")
}
override fun matchesDomain(input: String) = input.matchesDomainRegex("(m\\.)?youtube\\.com")
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,30 @@ class FacebookSanitizerTest : WordSpec(

"invoke" should {

"clean facebook.com URLs" {
"clean facebook.com reel URLs" {
sanitizer("https://www.facebook.com/reel/1242384407160280?sfnsn=scwspmo") shouldBe
"https://www.facebook.com/reel/1242384407160280"
}

"clean m.facebook.com story URLs" {
sanitizer(
"https://m.facebook.com/story.php?story_fbid=pfbid0HqS6zLZvNrQt6ACvjv3h" +
"Kq6khpVse437nWSq2jBifKRD5sVH2XRLC3zz8aA7TKkWl&id=4&sfnsn=wiwspmo&mibext" +
"id=XzsMCV",
) shouldBe "https://m.facebook.com/story.php?story_fbid=pfbid0HqS6zLZvNrQt6ACvjv" +
"3hKq6khpVse437nWSq2jBifKRD5sVH2XRLC3zz8aA7TKkWl&id=4"
}
}

"matchesDomain" should {

"match facebook.com" {
sanitizer.matchesDomain("https://facebook.com") shouldBe true
}

"match m.facebook.com" {
sanitizer.matchesDomain("https://m.facebook.com") shouldBe true
}
}
},
)

0 comments on commit af6e296

Please sign in to comment.