Skip to content

Commit

Permalink
feat: Add Google Ads sanitizer (#135)
Browse files Browse the repository at this point in the history
* fix: Remove unnecessary regex capturing group

* feat: Add Google Ads sanitizer

* chore: Use RegexFactory where appropriate
  • Loading branch information
svenjacobs authored Nov 18, 2022
1 parent 4321f8c commit 15ca04e
Show file tree
Hide file tree
Showing 9 changed files with 114 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import com.svenjacobs.app.leon.core.domain.sanitizer.emptyparameters.EmptyParame
import com.svenjacobs.app.leon.core.domain.sanitizer.facebook.FacebookSanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.flipkart.FlipkartSanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.georiot.GeoRiotSanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.google.GoogleAdsSanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.google.GoogleAnalyticsSanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.google.GoogleSearchSanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.instagram.InstagramSanitizer
Expand Down Expand Up @@ -63,6 +64,7 @@ class ContainerInitializer : DistinctInitializer<Unit> {
FacebookSanitizer(),
FlipkartSanitizer(),
GeoRiotSanitizer(),
GoogleAdsSanitizer(),
GoogleAnalyticsSanitizer(),
GoogleSearchSanitizer(),
InstagramSanitizer(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ object RegexFactory {
* @param parameter Parameter prefix
*/
@Suppress("RegExpUnnecessaryNonCapturingGroup")
fun ofParameter(parameter: String): Regex = Regex("[?&](?:$parameter)=.[^&#]*")
fun ofParameter(parameter: String): Regex = Regex("[?&](?:$parameter)=([^&#]*)")

/**
* Returns a regex string which matches a certain parameter prefix.
Expand All @@ -44,5 +44,5 @@ object RegexFactory {
* @param parameter Parameter prefix
*/
@Suppress("RegExpUnnecessaryNonCapturingGroup")
fun ofWildcardParameter(parameter: String): Regex = Regex("[?&](?:$parameter)[^=]*=.[^&#]*")
fun ofWildcardParameter(parameter: String): Regex = Regex("[?&](?:$parameter)[^=]*=([^&#]*)")
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,14 @@
package com.svenjacobs.app.leon.core.domain.sanitizer.georiot

import android.content.Context
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.Sanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.SanitizerId
import com.svenjacobs.app.leon.core.domain.sanitizer.SearchResultSanitizer

class GeoRiotSanitizer : SearchResultSanitizer(
Regex("[?&](?:GR_URL)=([^&]+)"),
RegexFactory.ofParameter("GR_URL"),
) {

override val id = SanitizerId("georiot")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* Léon - The URL Cleaner
* Copyright (C) 2022 Sven Jacobs
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package com.svenjacobs.app.leon.core.domain.sanitizer.google

import android.content.Context
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.Sanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.SanitizerId
import com.svenjacobs.app.leon.core.domain.sanitizer.SearchResultSanitizer

class GoogleAdsSanitizer : SearchResultSanitizer(
RegexFactory.ofParameter("adurl"),
) {

override val id = SanitizerId("google_ad_services")

override fun getMetadata(context: Context) = Sanitizer.Metadata(
name = context.getString(R.string.sanitizer_google_ads_name),
)

override fun matchesDomain(input: String) = DOMAIN_REGEX.containsMatchIn(input)

private companion object {
private val DOMAIN_REGEX = Regex("(?:https?://)?(?:www\\.)?googleadservices\\.com")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,14 @@
package com.svenjacobs.app.leon.core.domain.sanitizer.google

import android.content.Context
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.Sanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.SanitizerId
import com.svenjacobs.app.leon.core.domain.sanitizer.SearchResultSanitizer

class GoogleSearchSanitizer : SearchResultSanitizer(
Regex("[?&](?:url|q)=([^&]+)"),
RegexFactory.ofParameter("url|q"),
) {

override val id = SanitizerId("google_search")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,14 @@
package com.svenjacobs.app.leon.core.domain.sanitizer.linksynergy

import android.content.Context
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.Sanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.SanitizerId
import com.svenjacobs.app.leon.core.domain.sanitizer.SearchResultSanitizer

class LinkSynergySanitizer : SearchResultSanitizer(
Regex("[?&](?:murl)=([^&]+)"),
RegexFactory.ofParameter("murl"),
) {

override val id = SanitizerId("linksynergy")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,14 @@
package com.svenjacobs.app.leon.core.domain.sanitizer.youtube

import android.content.Context
import com.svenjacobs.app.leon.core.common.regex.RegexFactory
import com.svenjacobs.app.leon.core.domain.R
import com.svenjacobs.app.leon.core.domain.sanitizer.Sanitizer
import com.svenjacobs.app.leon.core.domain.sanitizer.SanitizerId
import com.svenjacobs.app.leon.core.domain.sanitizer.SearchResultSanitizer

class YoutubeRedirectSanitizer : SearchResultSanitizer(
Regex("[?&]q=([^&]+)"),
RegexFactory.ofParameter("q"),
) {

override val id = SanitizerId("youtube_redirect")
Expand Down
1 change: 1 addition & 0 deletions core-domain/src/main/res/values/strings.xml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
<string name="sanitizer_ga_name" translatable="false">Google Analytics</string>
<string name="sanitizer_georiot_name" translatable="false">GeoRiot</string>
<string name="sanitizer_google_search_name">Google Search</string>
<string name="sanitizer_google_ads_name" translatable="false">Google Ads</string>
<string name="sanitizer_instagram_name" translatable="false">Instagram</string>
<string name="sanitizer_linksynergy_name" translatable="false">LinkSynergy</string>
<string name="sanitizer_netflix_name" translatable="false">Netflix</string>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Léon - The URL Cleaner
* Copyright (C) 2022 Sven Jacobs
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package com.svenjacobs.app.leon.core.domain.sanitizer.google

import io.kotest.core.spec.style.WordSpec
import io.kotest.matchers.shouldBe

class GoogleAdsSanitizerTest : WordSpec(
{

val sanitizer = GoogleAdsSanitizer()

"invoke" should {

"extract URL from Google Ads link" {
val result = sanitizer(
"https://www.googleadservices.com/pagead/aclk?sa=L&ai=Ccd5fYmNxY_3JN4TQ" +
"o9kPwIqcyA37ibepbZuL5YvKELCQHxADIABgyY6xiZCk6A-CARdjYS1wdWItNjIxOTgxMTc0" +
"NzA0OTM3MaAB8OyYgSmpAt5OXuGRdag-qAMEyAMKqgS-AU_QsgWpC3X4b7LSH1pMKrb4yz70" +
"elntS3shophaRc0GA0363USvwfydKTGvg7sJgBeegkd_1uzEg99EtDgAbkpjEvmFZNcCJeAR" +
"A0iilQtb2pRgRapZIYuJVDtZJib1XVMlPnV5NyZqXeQe5uUQul1xczG2sCJcO1U7qtgHAgyH" +
"A5N_UZh9taO1_6Fxvs5Yrb1Y7aphw3MM1AJHp5xe1Nb-xlKSgYOtY73BYFW0GthsZKj3sYYa" +
"VAcaNKdGNQGSBQoIE2gBeMqzyIcEoAZu2AYCgAfwpOngA4gHAZAHApgHAqgHhAioB6jSG6gH" +
"tgeoB-DPG6gH6dQbqAeMzRuoB7HcG6gHpJqxAqgHkZ-xAqgHsJuxAqgH36GxAqgHgcYbqAer" +
"xRuoB-adsQKoB8ifsQKoB7ehsQKoB9WpsQLSCBgIhMCAQBACGAAyBIHCgA46B8qAgICAgQWx" +
"CdouDFc10x_4yAkAmAsBugtBCAIQBRgEIAgoATADQANIAFABWAdgAGgAcAGIAQCYAQGiARUK" +
"AggBKAGAAQHQAQGQAgKoAgXAAgLYAQGAAgGIAgXQCxK4DAGaDQESuBP___________8BsBQC" +
"wBWBgIBA0BUB2BUB4hYCCAGAFwGKFwoIAxgBKAEwATgBoBcBqRex-7nVVf8wwg&num=3&cid" +
"=CAESD-D2sxZZsSg9jandUiDpjA&sig=AOD64_2Kzq5JFcf_khsfs5olxZcNXN75iQ&adurl" +
"=https://www.evedyth.shop/&ms=CoACWubneRVfv34M04DyUIeLpuvkwlUzfXCpaunrav" +
"-Ai4gNVenCJflFdHYFDDR6LR7QwvKsfwOo9373FlWLUelELxRgenTytMogqTCU3Dp32taq7l" +
"tdunYB7MP8RcmJyHNKBHG1QNnEZpcSCdxaH4Mf8rPELdcNFeSsgR0tKY2Yfhc-fM90aG22GT" +
"ggyGMKgNXzziGFzmPmtPSDSiVbadhPHXoQTUB1U5NUQaR-CIV8816yqV2b_VOH4h0QZDWyab" +
"XhcrrfCpIQNLmEy8g39-YGcMlyiQBovndagTPNMGzoHbO6Yotf7AQeCUBgvIq9SGg-uBXmrg" +
"bo1UWe1t2v32dxSRIQ7MyqdMdGbezFYod08gtmhg&nb=8&nx=334&ny=15&dim=360x36",
)

result shouldBe "https://www.evedyth.shop/"
}
}
},
)

0 comments on commit 15ca04e

Please sign in to comment.