diff --git a/apps/api-runtime/src/main/java/com/akto/runtime/APICatalogSync.java b/apps/api-runtime/src/main/java/com/akto/runtime/APICatalogSync.java index 82149ac416..eeb63d5646 100644 --- a/apps/api-runtime/src/main/java/com/akto/runtime/APICatalogSync.java +++ b/apps/api-runtime/src/main/java/com/akto/runtime/APICatalogSync.java @@ -4,9 +4,13 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.regex.Pattern; -import com.akto.DaoInit; import com.akto.dao.*; import com.akto.dao.context.Context; +import com.akto.dao.filter.MergedUrlsDao; +import com.akto.dto.*; +import com.akto.dto.billing.SyncLimit; +import com.akto.dto.dependency_flow.DependencyFlow; +import com.akto.dto.filter.MergedUrls; import com.akto.dao.monitoring.FilterYamlTemplateDao; import com.akto.dao.runtime_filters.AdvancedTrafficFiltersDao; import com.akto.dto.*; @@ -25,6 +29,7 @@ import com.akto.dto.usage.MetricTypes; import com.akto.log.LoggerMaker; import com.akto.log.LoggerMaker.LogDb; +import com.akto.util.filter.DictionaryFilter; import com.akto.runtime.merge.MergeOnHostOnly; import com.akto.runtime.policies.AktoPolicyNew; import com.akto.task.Cluster; @@ -39,12 +44,10 @@ import com.google.common.hash.BloomFilter; import com.google.common.hash.Funnels; import com.mongodb.BasicDBObject; -import com.mongodb.ConnectionString; import com.mongodb.bulk.BulkWriteResult; import com.mongodb.client.model.*; import com.mongodb.client.result.UpdateResult; import org.apache.commons.lang3.math.NumberUtils; -import org.bson.Document; import org.bson.conversions.Bson; import org.bson.json.JsonParseException; import org.bson.types.ObjectId; @@ -52,7 +55,6 @@ import org.slf4j.LoggerFactory; -import java.util.*; import java.util.Map.Entry; import static com.akto.dto.type.KeyTypes.patternToSubType; @@ -72,6 +74,9 @@ public class APICatalogSync { public Map sensitiveParamInfoBooleanMap; public static boolean mergeAsyncOutside = true; public BloomFilter existingAPIsInDb = BloomFilter.create(Funnels.stringFunnel(Charsets.UTF_8), 1_000_000, 0.001 ); + + public static Set mergedUrls; + public Map advancedFilterMap = new HashMap<>(); public APICatalogSync(String userIdentifier,int thresh, boolean fetchAllSTI) { @@ -86,6 +91,7 @@ public APICatalogSync(String userIdentifier, int thresh, boolean fetchAllSTI, bo this.delta = new HashMap<>(); this.sensitiveParamInfoBooleanMap = new HashMap<>(); this.aktoPolicyNew = new AktoPolicyNew(); + mergedUrls = new HashSet<>(); if (buildFromDb) { buildFromDB(false, fetchAllSTI); AccountSettings accountSettings = AccountSettingsDao.instance.findOne(AccountSettingsDao.generateFilter()); @@ -720,6 +726,7 @@ public static URLTemplate tryParamteresingUrl(URLStatic newUrl){ int start = newUrl.getUrl().startsWith("http") ? 3 : 0; for(int i = start; i < tokens.length; i ++) { String tempToken = tokens[i]; + if(DictionaryFilter.isEnglishWord(tempToken)) continue; if (NumberUtils.isParsable(tempToken)) { newTypes[i] = isNumber(tempToken) ? SuperType.INTEGER : SuperType.FLOAT; @@ -749,7 +756,21 @@ public static URLTemplate tryParamteresingUrl(URLStatic newUrl){ } if (allNull) return null; - return new URLTemplate(tokens, newTypes, newUrl.getMethod()); + + URLTemplate urlTemplate = new URLTemplate(tokens, newTypes, newUrl.getMethod()); + + try { + for(MergedUrls mergedUrl : mergedUrls) { + if(mergedUrl.getUrl().equals(urlTemplate.getTemplateString()) && + mergedUrl.getMethod().equals(urlTemplate.getMethod().name())) { + return null; + } + } + } catch(Exception e) { + loggerMaker.errorAndAddToDb("Error while creating a new URL object: " + e.getMessage(), LogDb.RUNTIME); + } + + return urlTemplate; } @@ -771,6 +792,7 @@ public static URLTemplate tryMergeUrls(URLStatic dbUrl, URLStatic newUrl) { for(int i = 0; i < newTokens.length; i ++) { String tempToken = newTokens[i]; String dbToken = dbTokens[i]; + if (DictionaryFilter.isEnglishWord(tempToken) || DictionaryFilter.isEnglishWord(dbToken)) continue; int minCount = dbUrl.getUrl().startsWith("http") && newUrl.getUrl().startsWith("http") ? 3 : 0; if (tempToken.equalsIgnoreCase(dbToken) || i < minCount) { @@ -804,7 +826,20 @@ public static URLTemplate tryMergeUrls(URLStatic dbUrl, URLStatic newUrl) { if (allNull) return null; if (templatizedStrTokens <= 1) { - return new URLTemplate(newTokens, newTypes, newUrl.getMethod()); + URLTemplate urlTemplate = new URLTemplate(newTokens, newTypes, newUrl.getMethod()); + + try { + for(MergedUrls mergedUrl : mergedUrls) { + if(mergedUrl.getUrl().equals(urlTemplate.getTemplateString()) && + mergedUrl.getMethod().equals(urlTemplate.getMethod().name())) { + return null; + } + } + } catch(Exception e) { + loggerMaker.errorAndAddToDb("Error while creating a new URL object: " + e.getMessage(), LogDb.RUNTIME); + } + + return urlTemplate; } return null; @@ -1558,6 +1593,8 @@ public void buildFromDB(boolean calcDiff, boolean fetchAllSTI) { loggerMaker.errorAndAddToDb("Error while filling urls in apiCollection: " + e.getMessage(), LogDb.RUNTIME); } + mergedUrls = MergedUrlsDao.instance.getMergedUrls(); + loggerMaker.infoAndAddToDb("Building from db completed", LogDb.RUNTIME); aktoPolicyNew.buildFromDb(fetchAllSTI); } diff --git a/apps/api-runtime/src/main/java/com/akto/runtime/Main.java b/apps/api-runtime/src/main/java/com/akto/runtime/Main.java index 4009999aba..e0644e755e 100644 --- a/apps/api-runtime/src/main/java/com/akto/runtime/Main.java +++ b/apps/api-runtime/src/main/java/com/akto/runtime/Main.java @@ -4,7 +4,6 @@ import java.util.*; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.TimeUnit; import java.util.regex.Pattern; import com.akto.DaoInit; @@ -16,6 +15,7 @@ import com.akto.log.LoggerMaker; import com.akto.log.LoggerMaker.LogDb; import com.akto.parsers.HttpCallParser; +import com.akto.util.filter.DictionaryFilter; import com.akto.runtime.utils.Utils; import com.akto.util.AccountTask; import com.akto.util.DashboardMode; @@ -150,6 +150,8 @@ public static void main(String[] args) { } int maxPollRecordsConfig = Integer.parseInt(System.getenv("AKTO_KAFKA_MAX_POLL_RECORDS_CONFIG")); + DictionaryFilter.readDictionaryBinary(); + if (topicName == null) topicName = "akto.api.logs"; DaoInit.init(new ConnectionString(mongoURI)); diff --git a/apps/api-runtime/src/test/java/com/akto/parsers/TestDBSync.java b/apps/api-runtime/src/test/java/com/akto/parsers/TestDBSync.java index c1b99d5f9a..12c8b4d9c4 100644 --- a/apps/api-runtime/src/test/java/com/akto/parsers/TestDBSync.java +++ b/apps/api-runtime/src/test/java/com/akto/parsers/TestDBSync.java @@ -26,6 +26,7 @@ import com.akto.runtime.APICatalogSync; import com.akto.runtime.Main; import com.akto.runtime.URLAggregator; +import com.akto.util.filter.DictionaryFilter; import com.akto.runtime.utils.Utils; import com.mongodb.BasicDBObject; import com.mongodb.client.model.Filters; @@ -43,6 +44,8 @@ public class TestDBSync extends MongoBasedTest { public void changeAccountId() { Context.accountId.set(currAccountId); currAccountId += 1; + DictionaryFilter.readDictionaryBinary(); + } public void testInitializer(){ @@ -231,7 +234,7 @@ public void testInvalidMergeParameterizedURL() { APICatalogSync.mergeUrlsAndSave(123, true, false, sync.existingAPIsInDb); sync.buildFromDB(false, true); - assertEquals(0, sync.getDbState(123).getStrictURLToMethods().size()); + assertEquals(1, sync.getDbState(123).getStrictURLToMethods().size()); assertEquals(1, sync.getDbState(123).getTemplateURLToMethods().size()); diff --git a/apps/api-runtime/src/test/java/com/akto/parsers/TestMergingNew.java b/apps/api-runtime/src/test/java/com/akto/parsers/TestMergingNew.java index c452b12ba1..bcf531b26c 100644 --- a/apps/api-runtime/src/test/java/com/akto/parsers/TestMergingNew.java +++ b/apps/api-runtime/src/test/java/com/akto/parsers/TestMergingNew.java @@ -15,6 +15,7 @@ import com.akto.dto.type.*; import com.akto.runtime.APICatalogSync; import com.akto.types.CappedSet; +import com.akto.util.filter.DictionaryFilter; import com.akto.utils.RedactSampleData; import com.google.api.client.util.Charsets; import com.google.common.hash.BloomFilter; @@ -25,6 +26,8 @@ import com.mongodb.client.model.Updates; import org.bson.conversions.Bson; import org.bson.types.ObjectId; +import org.junit.Before; +import org.junit.BeforeClass; import org.junit.Test; import java.util.*; @@ -36,6 +39,14 @@ public class TestMergingNew extends MongoBasedTest { + @Before + public void initMain() { + DictionaryFilter.readDictionaryBinary(); + } + + + + public void testInitializer(){ Map aktoDataTypeMap = new HashMap<>(); aktoDataTypeMap.put("JWT", new AktoDataType(null, false, null, 0, new IgnoreData(new HashMap<>(), new HashSet<>()), false, true)); @@ -95,6 +106,82 @@ public void testMultipleIntegerMerging() { } + @Test + public void testStringMerging() { + testInitializer(); + SingleTypeInfoDao.instance.getMCollection().drop(); + ApiCollectionsDao.instance.getMCollection().drop(); + HttpCallParser parser = new HttpCallParser("userIdentifier", 1, 1, 1, true); + + String baseUrl = "/api/"; + List responseParams = new ArrayList<>(); + List urls = Arrays.asList( + baseUrl + "demo", + baseUrl + "cat", + baseUrl + "OSHE2CNS", + baseUrl + "2HOIWNJK", + baseUrl + "31a1a7c5-b4e3-47f5-8579-f7fc044c6a98", + baseUrl + "tree" + ); + + for (String c : urls) { + HttpResponseParams resp = createSampleParams("user1", c); + responseParams.add(resp); + } + + parser.syncFunction(responseParams, false, true, null); + parser.apiCatalogSync.syncWithDB(false, true, SyncLimit.noLimit); + APICatalogSync.mergeUrlsAndSave(123, true, false, parser.apiCatalogSync.existingAPIsInDb); + parser.apiCatalogSync.buildFromDB(true, true); + Map urlTemplateMap = parser.apiCatalogSync.getDbState(123).getTemplateURLToMethods(); + Map strictUrlMap = parser.apiCatalogSync.getDbState(123).getStrictURLToMethods(); + + + assertEquals(1, urlTemplateMap.size()); + assertEquals(3, strictUrlMap.size()); + } + + @Test + public void testEnglishWordsUrlTestString() { + testInitializer(); + SingleTypeInfoDao.instance.getMCollection().drop(); + ApiCollectionsDao.instance.getMCollection().drop(); + HttpCallParser parser = new HttpCallParser("userIdentifier", 1, 1, 1, true); + String url = "/link/"; + List responseParams = new ArrayList<>(); + List urls = new ArrayList<>(); + for (String x: Arrays.asList( + "apple", "banana", "cat", "dog", "elephant", "flower", "guitar", "house", + "island", "jungle", "kite", "lemon", "mountain", "night", "ocean", "piano", + "queen", "river", "sun", "tree", "umbrella", "village", "whale", "xylophone", + "yacht", "zebra", "bird", "clock", "desert", "engine", "forest", "garden", + "honey", "igloo", "jacket", "kangaroo", "lamp", "mirror", "notebook", "orange", + "pencil", "quilt", "rain", "star", "telephone", "uniform", "violin", "window", + "yellow", "zipper" + )) { + urls.add(url+x); + } + for (String c: urls) { + HttpResponseParams resp = createSampleParams("user1", c); + responseParams.add(resp); + } + + parser.syncFunction(responseParams.subList(0,23), false, true, null); + parser.apiCatalogSync.syncWithDB(false, true, SyncLimit.noLimit); + assertEquals(23, getStaticURLsSize(parser)); + + parser.syncFunction(responseParams.subList(23,28), false, true, null); + parser.apiCatalogSync.syncWithDB(false, true, SyncLimit.noLimit); + APICatalogSync.mergeUrlsAndSave(123,true, false, parser.apiCatalogSync.existingAPIsInDb); + parser.apiCatalogSync.buildFromDB(false, true); + assertEquals(28, getStaticURLsSize(parser)); + + parser.syncFunction(responseParams.subList(28,33), false, true, null); + parser.apiCatalogSync.syncWithDB(false, true, SyncLimit.noLimit); + assertEquals(33, getStaticURLsSize(parser)); + } + + public int getStaticURLsSize(HttpCallParser parser) { Map urlStaticMap = parser.apiCatalogSync.getDbState(123).getStrictURLToMethods(); diff --git a/apps/dashboard/src/main/java/com/akto/action/observe/InventoryAction.java b/apps/dashboard/src/main/java/com/akto/action/observe/InventoryAction.java index 83dc5d355c..6a0a67c49e 100644 --- a/apps/dashboard/src/main/java/com/akto/action/observe/InventoryAction.java +++ b/apps/dashboard/src/main/java/com/akto/action/observe/InventoryAction.java @@ -3,9 +3,11 @@ import com.akto.action.UserAction; import com.akto.dao.*; import com.akto.dao.context.Context; +import com.akto.dao.filter.MergedUrlsDao; import com.akto.dto.*; import com.akto.dto.ApiInfo.ApiInfoKey; import com.akto.dto.CodeAnalysisApiInfo.CodeAnalysisApiInfoKey; +import com.akto.dto.filter.MergedUrls; import com.akto.dto.traffic.SampleData; import com.akto.dto.type.*; import com.akto.dto.type.URLMethods.Method; @@ -723,6 +725,20 @@ public String deMergeApi() { return ERROR.toUpperCase(); } + try { + MergedUrlsDao.instance.updateOne(Filters.and( + Filters.eq(MergedUrls.URL, url), + Filters.eq(MergedUrls.METHOD, method), + Filters.eq(MergedUrls.API_COLLECTION_ID, apiCollectionId) + ), Updates.combine( + Updates.set(MergedUrls.URL, url), + Updates.set(MergedUrls.METHOD, method), + Updates.set(MergedUrls.API_COLLECTION_ID, apiCollectionId) + )); + } catch (Exception e) { + loggerMaker.errorAndAddToDb("Error while saving merged url in DB: " + e.getMessage(), LogDb.DASHBOARD); + } + SampleData sampleData = SampleDataDao.instance.fetchSampleDataForApi(apiCollectionId, url, urlMethod); List samples = sampleData.getSamples(); diff --git a/apps/dashboard/src/main/java/com/akto/listener/InitializerListener.java b/apps/dashboard/src/main/java/com/akto/listener/InitializerListener.java index 1f4f191344..1d45358cf5 100644 --- a/apps/dashboard/src/main/java/com/akto/listener/InitializerListener.java +++ b/apps/dashboard/src/main/java/com/akto/listener/InitializerListener.java @@ -77,6 +77,7 @@ import com.akto.testing.HostDNSLookup; import com.akto.usage.UsageMetricHandler; import com.akto.testing.workflow_node_executor.Utils; +import com.akto.util.filter.DictionaryFilter; import com.akto.utils.jobs.JobUtils; import com.akto.utils.jobs.MatchingJob; import com.akto.util.AccountTask; @@ -1914,6 +1915,7 @@ public static boolean isNotKubernetes() { @Override public void contextInitialized(javax.servlet.ServletContextEvent sce) { setSubdomain(); + DictionaryFilter.readDictionaryBinary(); String https = System.getenv("AKTO_HTTPS_FLAG"); if (Objects.equals(https, "true")) { diff --git a/apps/mini-runtime/src/main/java/com/akto/hybrid_runtime/APICatalogSync.java b/apps/mini-runtime/src/main/java/com/akto/hybrid_runtime/APICatalogSync.java index b98d977dbc..f5a8cdba1d 100644 --- a/apps/mini-runtime/src/main/java/com/akto/hybrid_runtime/APICatalogSync.java +++ b/apps/mini-runtime/src/main/java/com/akto/hybrid_runtime/APICatalogSync.java @@ -4,13 +4,13 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.regex.Pattern; -import com.akto.DaoInit; import com.akto.dao.*; import com.akto.dao.context.Context; +import com.akto.dao.filter.MergedUrlsDao; import com.akto.dto.*; -import com.akto.dto.HttpResponseParams.Source; import com.akto.dto.bulk_updates.BulkUpdates; import com.akto.dto.bulk_updates.UpdatePayload; +import com.akto.dto.filter.MergedUrls; import com.akto.dto.traffic.Key; import com.akto.dto.traffic.SampleData; import com.akto.dto.traffic.TrafficInfo; @@ -19,36 +19,26 @@ import com.akto.dto.type.SingleTypeInfo.SubType; import com.akto.dto.type.SingleTypeInfo.SuperType; import com.akto.dto.type.URLMethods.Method; -import com.akto.hybrid_parsers.HttpCallParser; import com.akto.log.LoggerMaker; import com.akto.log.LoggerMaker.LogDb; import com.akto.data_actor.DataActor; import com.akto.data_actor.DataActorFactory; -import com.akto.hybrid_runtime.merge.MergeOnHostOnly; import com.akto.hybrid_runtime.policies.AktoPolicyNew; -import com.akto.task.Cluster; +import com.akto.util.filter.DictionaryFilter; import com.akto.types.CappedSet; import com.akto.util.JSONUtils; import com.akto.utils.RedactSampleData; import com.alibaba.fastjson2.JSON; import com.alibaba.fastjson2.JSONObject; -import com.mongodb.BasicDBObject; -import com.mongodb.ConnectionString; -import com.mongodb.bulk.BulkWriteResult; import com.mongodb.client.model.*; import com.mongodb.client.result.UpdateResult; import org.apache.commons.lang3.math.NumberUtils; -import org.bson.Document; import org.bson.conversions.Bson; import org.bson.json.JsonParseException; import org.bson.types.ObjectId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.*; -import java.util.concurrent.ConcurrentHashMap; -import java.util.regex.Pattern; - import static com.akto.dto.type.KeyTypes.patternToSubType; public class APICatalogSync { @@ -68,6 +58,7 @@ public class APICatalogSync { public int lastStiFetchTs = 0; private DataActor dataActor = DataActorFactory.fetchInstance(); + public static Set mergedUrls; public APICatalogSync(String userIdentifier,int thresh, boolean fetchAllSTI) { this(userIdentifier, thresh, fetchAllSTI, true); @@ -81,6 +72,7 @@ public APICatalogSync(String userIdentifier, int thresh, boolean fetchAllSTI, bo this.delta = new HashMap<>(); this.sensitiveParamInfoBooleanMap = new HashMap<>(); this.aktoPolicyNew = new AktoPolicyNew(); + this.mergedUrls = new HashSet<>(); if (buildFromDb) { buildFromDB(false, fetchAllSTI); AccountSettings accountSettings = dataActor.fetchAccountSettings(); @@ -325,6 +317,7 @@ private void tryMergingWithKnownStrictURLs( while (iterator.hasNext()) { Map.Entry entry = iterator.next(); URLStatic newUrl = entry.getKey(); + RequestTemplate newTemplate = entry.getValue(); String[] tokens = tokenize(newUrl.getUrl()); @@ -534,6 +527,7 @@ public static URLTemplate tryParamteresingUrl(URLStatic newUrl){ int start = newUrl.getUrl().startsWith("http") ? 3 : 0; for(int i = start; i < tokens.length; i ++) { String tempToken = tokens[i]; + if(DictionaryFilter.isEnglishWord(tempToken)) continue; if (NumberUtils.isParsable(tempToken)) { newTypes[i] = isNumber(tempToken) ? SuperType.INTEGER : SuperType.FLOAT; @@ -563,7 +557,21 @@ public static URLTemplate tryParamteresingUrl(URLStatic newUrl){ } if (allNull) return null; - return new URLTemplate(tokens, newTypes, newUrl.getMethod()); + + URLTemplate urlTemplate = new URLTemplate(tokens, newTypes, newUrl.getMethod()); + + try { + for(MergedUrls mergedUrl : mergedUrls) { + if(mergedUrl.getUrl().equals(urlTemplate.getTemplateString()) && + mergedUrl.getMethod().equals(urlTemplate.getMethod().name())) { + return null; + } + } + } catch(Exception e) { + loggerMaker.errorAndAddToDb("Error while creating a new URL object: " + e.getMessage(), LogDb.RUNTIME); + } + + return urlTemplate; } @@ -585,6 +593,7 @@ public static URLTemplate tryMergeUrls(URLStatic dbUrl, URLStatic newUrl) { for(int i = 0; i < newTokens.length; i ++) { String tempToken = newTokens[i]; String dbToken = dbTokens[i]; + if (DictionaryFilter.isEnglishWord(tempToken) || DictionaryFilter.isEnglishWord(dbToken)) continue; int minCount = dbUrl.getUrl().startsWith("http") && newUrl.getUrl().startsWith("http") ? 3 : 0; if (tempToken.equalsIgnoreCase(dbToken) || i < minCount) { @@ -618,7 +627,20 @@ public static URLTemplate tryMergeUrls(URLStatic dbUrl, URLStatic newUrl) { if (allNull) return null; if (templatizedStrTokens <= 1) { - return new URLTemplate(newTokens, newTypes, newUrl.getMethod()); + URLTemplate urlTemplate = new URLTemplate(newTokens, newTypes, newUrl.getMethod()); + + try { + for(MergedUrls mergedUrl : mergedUrls) { + if(mergedUrl.getUrl().equals(urlTemplate.getTemplateString()) && + mergedUrl.getMethod().equals(urlTemplate.getMethod().name())) { + return null; + } + } + } catch(Exception e) { + loggerMaker.errorAndAddToDb("Error while creating a new URL object: " + e.getMessage(), LogDb.RUNTIME); + } + + return urlTemplate; } return null; @@ -636,6 +658,7 @@ private void tryWithKnownURLTemplates( while (iterator.hasNext()) { Map.Entry entry = iterator.next(); URLStatic newUrl = entry.getKey(); + RequestTemplate newRequestTemplate = entry.getValue(); for (URLTemplate urlTemplate: dbCatalog.getTemplateURLToMethods().keySet()) { @@ -1093,6 +1116,9 @@ public void buildFromDB(boolean calcDiff, boolean fetchAllSTI) { } catch (Exception e) { loggerMaker.infoAndAddToDb("Error while clearing values in db: " + e.getMessage(), LogDb.RUNTIME); } + + mergedUrls = MergedUrlsDao.instance.getMergedUrls(); + aktoPolicyNew.buildFromDb(fetchAllSTI); } diff --git a/apps/mini-runtime/src/main/java/com/akto/hybrid_runtime/Main.java b/apps/mini-runtime/src/main/java/com/akto/hybrid_runtime/Main.java index 6fada3f0ac..5ecc98741d 100644 --- a/apps/mini-runtime/src/main/java/com/akto/hybrid_runtime/Main.java +++ b/apps/mini-runtime/src/main/java/com/akto/hybrid_runtime/Main.java @@ -23,6 +23,7 @@ import com.akto.data_actor.DataActorFactory; import com.akto.database_abstractor_authenticator.JwtAuthenticator; import com.akto.util.DashboardMode; +import com.akto.util.filter.DictionaryFilter; import com.google.gson.Gson; import com.mongodb.ConnectionString; import com.mongodb.client.model.Filters; @@ -151,6 +152,7 @@ public static void main(String[] args) { if (topicName == null) topicName = "akto.api.logs"; //DaoInit.init(new ConnectionString(mongoURI)); + DictionaryFilter.readDictionaryBinary(); loggerMaker.infoAndAddToDb("Runtime starting at " + Context.now() + "....", LogDb.RUNTIME); diff --git a/libs/dao/src/main/java/com/akto/dao/filter/MergedUrlsDao.java b/libs/dao/src/main/java/com/akto/dao/filter/MergedUrlsDao.java new file mode 100644 index 0000000000..028cdaf66f --- /dev/null +++ b/libs/dao/src/main/java/com/akto/dao/filter/MergedUrlsDao.java @@ -0,0 +1,36 @@ +package com.akto.dao.filter; + +import com.akto.dao.AccountsContextDao; +import com.akto.dto.filter.MergedUrls; +import com.mongodb.client.MongoCursor; + +import java.util.HashSet; +import java.util.Set; + +public class MergedUrlsDao extends AccountsContextDao { + + public static final MergedUrlsDao instance = new MergedUrlsDao(); + + @Override + public String getCollName() { + return "merged_urls"; + } + + public Set getMergedUrls() { + MongoCursor cursor = instance.getMCollection().find().cursor(); + + Set mergedUrls = new HashSet<>(); + + while(cursor.hasNext()) { + MergedUrls mergedUrlsObj = cursor.next(); + mergedUrls.add(mergedUrlsObj); + } + + return mergedUrls; + } + + @Override + public Class getClassT() { + return MergedUrls.class; + } +} diff --git a/libs/dao/src/main/java/com/akto/dto/filter/MergedUrls.java b/libs/dao/src/main/java/com/akto/dto/filter/MergedUrls.java new file mode 100644 index 0000000000..df94cfb5f8 --- /dev/null +++ b/libs/dao/src/main/java/com/akto/dto/filter/MergedUrls.java @@ -0,0 +1,56 @@ +package com.akto.dto.filter; + +import org.bson.types.ObjectId; + +public class MergedUrls { + private ObjectId id; + + public static final String URL = "url"; + private String url; + + public static final String METHOD = "method"; + private String method; + + public static final String API_COLLECTION_ID = "apiCollectionId"; + private int apiCollectionId; + + public MergedUrls() {} + + public MergedUrls(String url, String method, int apiCollectionId) { + this.url = url; + this.method = method; + this.apiCollectionId = apiCollectionId; + } + + public ObjectId getId() { + return id; + } + + public void setId(ObjectId id) { + this.id = id; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public String getMethod() { + return method; + } + + public void setMethod(String method) { + this.method = method; + } + + public int getApiCollectionId() { + return apiCollectionId; + } + + public void setApiCollectionId(int apiCollectionId) { + this.apiCollectionId = apiCollectionId; + } +} diff --git a/libs/dao/src/main/java/com/akto/dto/type/URLTemplate.java b/libs/dao/src/main/java/com/akto/dto/type/URLTemplate.java index e729995289..0430ccebf6 100644 --- a/libs/dao/src/main/java/com/akto/dto/type/URLTemplate.java +++ b/libs/dao/src/main/java/com/akto/dto/type/URLTemplate.java @@ -6,6 +6,7 @@ import com.akto.dto.type.SingleTypeInfo.SuperType; import com.akto.dto.type.URLMethods.Method; +import com.akto.util.filter.DictionaryFilter; import org.bson.codecs.pojo.annotations.BsonDiscriminator; import org.bson.codecs.pojo.annotations.BsonId; import org.apache.commons.lang3.math.NumberUtils; @@ -70,7 +71,7 @@ public boolean match(String[] url, Method urlMethod) { if (thisToken == null) { SuperType type = types[i]; - + if (DictionaryFilter.isEnglishWord(thatToken)) return false; switch(type) { case BOOLEAN: if (!"true".equals(thatToken.toLowerCase()) && !"false".equals(thatToken.toLowerCase())) return false; diff --git a/libs/dao/src/main/java/com/akto/util/filter/DictionaryFilter.java b/libs/dao/src/main/java/com/akto/util/filter/DictionaryFilter.java new file mode 100644 index 0000000000..1415357927 --- /dev/null +++ b/libs/dao/src/main/java/com/akto/util/filter/DictionaryFilter.java @@ -0,0 +1,55 @@ +package com.akto.util.filter; + +import com.google.common.hash.BloomFilter; +import com.google.common.hash.Funnels; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.InputStream; +import java.nio.charset.StandardCharsets; + +public class DictionaryFilter { + private static final Logger logger = LoggerFactory.getLogger(DictionaryFilter.class); + public static BloomFilter dictFilter = null; + + public static void readDictionaryBinary() { + // English Word List Repo: https://github.com/dwyl/english-words + try (InputStream binary = DictionaryFilter.class.getResourceAsStream("/DictionaryBinary")) { + logger.info("reading dictionary binary"); + dictFilter = BloomFilter.readFrom(binary, Funnels.stringFunnel(StandardCharsets.UTF_8)); + } catch (Exception e) { + logger.error("Error while reading bloom filter binary: " + e.getMessage(), e); + } + } + + public static boolean isEnglishWord(String word) { + if(dictFilter == null || word.trim().isEmpty()) return false; + + String[] symbolWords = word.split("[-_.]"); + if(wordsChecker(symbolWords)) return true; + + boolean flag = true; + + for(String symbolWord : symbolWords) { + String[] camelCaseWords = StringUtils.splitByCharacterTypeCamelCase(symbolWord); + if(!wordsChecker(camelCaseWords)) { + flag = false; + break; + } + } + + if(flag) return true; + + return dictFilter.mightContain(word); + } + + private static boolean wordsChecker(String[] words) { + for(String seg : words) { + if(!seg.isEmpty() && !dictFilter.mightContain(seg.toUpperCase())) return false; + } + + return true; + } + +} diff --git a/libs/dao/src/main/java/com/akto/util/filter/DictionaryFilterCreator.java b/libs/dao/src/main/java/com/akto/util/filter/DictionaryFilterCreator.java new file mode 100644 index 0000000000..cc34f608a9 --- /dev/null +++ b/libs/dao/src/main/java/com/akto/util/filter/DictionaryFilterCreator.java @@ -0,0 +1,70 @@ +package com.akto.util.filter; + +import com.google.common.hash.BloomFilter; +import com.google.common.hash.Funnels; + +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.util.List; + +public class DictionaryFilterCreator { + + public static void main(String[] args) { + DictionaryFilterCreator.insertDictionary(); +// DictionaryFilterCreator.readDictionary(); + } + public static BloomFilter dictFilter = BloomFilter.create(Funnels.stringFunnel(StandardCharsets.UTF_8), 1_000_000, 0.001); + + public static void insertDictionary() { + /* + - English Word List Repo: https://github.com/dwyl/english-words/blob/master/words_alpha.txt + */ + try(InputStream inputStream = DictionaryFilterCreator.class.getResourceAsStream("/words_alpha.txt")) { + try(BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream))) { + + if(inputStream == null) { + System.err.println("File not found!"); + return; + } + + String line; + while((line = reader.readLine()) != null) { + if(line.length() > 2) dictFilter.put(line.toUpperCase()); + } + + insertWords(WordListProvider.getTwoLettersWords()); + insertWords(WordListProvider.getAdjectives()); + insertWords(WordListProvider.getAdverbs()); + insertWords(WordListProvider.getNouns()); + insertWords(WordListProvider.getVerbs()); + insertWords(WordListProvider.getArticles()); + insertWords(WordListProvider.getPronouns()); + + try (FileOutputStream fos = new FileOutputStream("libs/dao/src/main/resources/DictionaryBinary")) { + dictFilter.writeTo(fos); + } catch (IOException e) { + e.printStackTrace(); + } + } + } catch (Exception e) { + e.printStackTrace(); + } + } + + private static void insertWords(List wordList) { + for(String twoLetterWord : wordList) { + dictFilter.put(twoLetterWord.toUpperCase()); + } + } + + public static void readDictionary() { + try(InputStream fis = DictionaryFilterCreator.class.getResourceAsStream("/DictionaryBinary")) { + BloomFilter bloomFilter = BloomFilter.readFrom(fis, Funnels.stringFunnel(StandardCharsets.UTF_8)); + + System.out.println(bloomFilter.mightContain("aa".toUpperCase())); + + } catch (IOException e) { + e.printStackTrace(); + } + } +} diff --git a/libs/dao/src/main/java/com/akto/util/filter/WordListProvider.java b/libs/dao/src/main/java/com/akto/util/filter/WordListProvider.java new file mode 100644 index 0000000000..d23e3695e2 --- /dev/null +++ b/libs/dao/src/main/java/com/akto/util/filter/WordListProvider.java @@ -0,0 +1,64 @@ +package com.akto.util.filter; + +import java.util.Arrays; +import java.util.List; + +public class WordListProvider { + + // Method to return a list of two letters words + public static List getTwoLettersWords() { + return Arrays.asList( + "am", "an", "as", "at", "be", "by", "do", + "go", "he", "if", "in", "is", "it", "me", + "my", "no", "of", "on", "or", "so", "to", + "up", "us", "we", "ok" + ); + } + + // Method to return a list of noun words + public static List getNouns() { + return Arrays.asList( + "cat", "dog", "car", "tree", "house", + "computer", "city", "river", "bird", "mountain" + ); + } + + // Method to return a list of verb words + public static List getVerbs() { + return Arrays.asList( + "run", "jump", "swim", "read", "write", + "fly", "eat", "sleep", "walk", "drive" + ); + } + + // Method to return a list of adverb words + public static List getAdverbs() { + return Arrays.asList( + "quickly", "slowly", "carefully", "easily", "happily", + "sadly", "loudly", "silently", "brightly", "angrily" + ); + } + + // Method to return a list of adjective words + public static List getAdjectives() { + return Arrays.asList( + "happy", "sad", "fast", "slow", "bright", + "dark", "tall", "short", "new", "old" + ); + } + + // Method to return a list of article words + public static List getArticles() { + return Arrays.asList( + "a", "an", "the" + ); + } + + // Method to return a list of pronoun words + public static List getPronouns() { + return Arrays.asList( + "he", "she", "it", "they", "we", + "you", "I", "me", "us", "them" + ); + } +} \ No newline at end of file diff --git a/libs/dao/src/main/resources/DictionaryBinary b/libs/dao/src/main/resources/DictionaryBinary new file mode 100644 index 0000000000..b99825594d Binary files /dev/null and b/libs/dao/src/main/resources/DictionaryBinary differ diff --git a/libs/dao/src/test/java/com/akto/utils/filter/TestDictionaryFilter.java b/libs/dao/src/test/java/com/akto/utils/filter/TestDictionaryFilter.java new file mode 100644 index 0000000000..98b157bf50 --- /dev/null +++ b/libs/dao/src/test/java/com/akto/utils/filter/TestDictionaryFilter.java @@ -0,0 +1,155 @@ +package com.akto.utils.filter; + +import com.akto.util.filter.DictionaryFilter; +import org.junit.Before; +import org.junit.Test; + +import java.util.Arrays; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +public class TestDictionaryFilter { + @Before + public void initMain() { + DictionaryFilter.readDictionaryBinary(); + } + + + @Test + public void testValidEnglishWord() { + assertTrue(DictionaryFilter.isEnglishWord("demo")); + assertTrue(DictionaryFilter.isEnglishWord("cat")); + assertTrue(DictionaryFilter.isEnglishWord("example")); + } + + @Test + public void testInvalidEnglishWord() { + assertFalse(DictionaryFilter.isEnglishWord("xyzabc")); + assertFalse(DictionaryFilter.isEnglishWord("nonexistentword")); + } + + @Test + public void testHyphenatedWords() { + assertTrue(DictionaryFilter.isEnglishWord("well-known")); + assertFalse(DictionaryFilter.isEnglishWord("well-known-xyzabc")); + } + + @Test + public void testUnderscoreSeparatedWords() { + assertTrue(DictionaryFilter.isEnglishWord("black_white")); + assertTrue(DictionaryFilter.isEnglishWord("red_blue")); + assertFalse(DictionaryFilter.isEnglishWord("red_blue_xyzabc")); + } + + @Test + public void testDotSeparatedWords() { + assertTrue(DictionaryFilter.isEnglishWord("hello.world")); + assertTrue(DictionaryFilter.isEnglishWord("good.bye")); + assertFalse(DictionaryFilter.isEnglishWord("hello.world.xyzabc")); + } + + @Test + public void testTwoLetterWords() { + assertTrue(DictionaryFilter.isEnglishWord("is")); + assertTrue(DictionaryFilter.isEnglishWord("by")); + assertTrue(DictionaryFilter.isEnglishWord("of")); + assertFalse(DictionaryFilter.isEnglishWord("iz")); + } + + @Test + public void testEmptyString() { + assertFalse(DictionaryFilter.isEnglishWord("")); + } + + @Test + public void testMixedCaseWords() { + assertTrue(DictionaryFilter.isEnglishWord("Demo")); + assertTrue(DictionaryFilter.isEnglishWord("CaT")); + assertTrue(DictionaryFilter.isEnglishWord("YesExist")); + assertFalse(DictionaryFilter.isEnglishWord("NotExistz")); + assertFalse(DictionaryFilter.isEnglishWord("OSHE2CNS")); + } + + @Test + public void testEveryCase() { + assertTrue(DictionaryFilter.isEnglishWord("smallDog_Cute")); + assertTrue(DictionaryFilter.isEnglishWord("You.and-JohnHappy")); + assertTrue(DictionaryFilter.isEnglishWord("DEMO_goingWhere")); + assertTrue(DictionaryFilter.isEnglishWord("DEMO_goingWhereIS")); + assertFalse(DictionaryFilter.isEnglishWord("DEMOZ_goingHere")); + assertFalse(DictionaryFilter.isEnglishWord("DEMO_goingHere.iz")); + assertFalse(DictionaryFilter.isEnglishWord("You.and-XyzabcHappy")); + } + + @Test + public void testTwoLettersWords() { + for(String word : Arrays.asList( + "am", "an", "as", "at", "be", "by", "do", + "go", "he", "if", "in", "is", "it", "me", + "my", "no", "of", "on", "or", "so", "to", + "up", "us", "we" + )) { + assertTrue(DictionaryFilter.isEnglishWord(word)); + } + } + + @Test + public void testNouns() { + for(String word : Arrays.asList( + "cat", "dog", "car", "tree", "house", + "computer", "city", "river", "bird", "mountain" + )) { + assertTrue(DictionaryFilter.isEnglishWord(word)); + } + } + + @Test + public void testVerbs() { + for(String word : Arrays.asList( + "run", "jump", "swim", "read", "write", + "fly", "eat", "sleep", "walk", "drive" + )) { + assertTrue(DictionaryFilter.isEnglishWord(word)); + } + } + + @Test + public void testAdverbs() { + for(String word : Arrays.asList( + "quickly", "slowly", "carefully", "easily", "happily", + "sadly", "loudly", "silently", "brightly", "angrily" + )) { + assertTrue(DictionaryFilter.isEnglishWord(word)); + } + } + + @Test + public void testAdjectives() { + for(String word : Arrays.asList( + "happy", "sad", "fast", "slow", "bright", + "dark", "tall", "short", "new", "old" + )) { + assertTrue(DictionaryFilter.isEnglishWord(word)); + } + } + + @Test + public void testArticles() { + for(String word : Arrays.asList( + "a", "an", "the" + )) { + assertTrue(DictionaryFilter.isEnglishWord(word)); + } + } + + @Test + public void testPronouns() { + for(String word : Arrays.asList( + "he", "she", "it", "they", "we", + "you", "I", "me", "us", "them" + )) { + assertTrue(DictionaryFilter.isEnglishWord(word)); + } + } +}