Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/bloom filter for dict #1397

Merged
merged 16 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
db33344
feat: created bloom filter for english dictionary words
TangoBeee Aug 22, 2024
37425d6
Merge branch 'akto-api-security:master' into feature/bloom_filter_for…
TangoBeee Aug 22, 2024
fbc807d
fix: loading the dictionary binary while initializing context
TangoBeeAkto Aug 23, 2024
550559b
fix: added a camel case check for isEnglishWord and fixed some bugs
TangoBeeAkto Aug 23, 2024
afcc9bd
fix: loading dictionary binary in TestDBSync
TangoBeeAkto Aug 23, 2024
554c161
feat: added more unit tests and added two letters words in the dictio…
TangoBeeAkto Aug 26, 2024
48863a6
removed words_alpha.txt from the resource folder
TangoBeeAkto Aug 27, 2024
f763179
feat: created bloom filter for english dictionary words
TangoBeee Aug 22, 2024
3c8b8ca
fix: loading the dictionary binary while initializing context
TangoBeeAkto Aug 23, 2024
93e1ea4
fix: added a camel case check for isEnglishWord and fixed some bugs
TangoBeeAkto Aug 23, 2024
62e6067
fix: loading dictionary binary in TestDBSync
TangoBeeAkto Aug 23, 2024
c635919
feat: added more unit tests and added two letters words in the dictio…
TangoBeeAkto Aug 26, 2024
3764ffd
removed words_alpha.txt from the resource folder
TangoBeeAkto Aug 27, 2024
bba8511
Merge remote-tracking branch 'origin/feature/bloom_filter_for_dict' i…
TangoBeeAkto Aug 30, 2024
c7e85c1
feat: inserting demerged urls in db and stopping them from merging again
TangoBeeAkto Sep 2, 2024
2e1de40
Merge branch 'master' into feature/bloom_filter_for_dict
TangoBeeAkto Sep 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,13 @@
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;

import com.akto.DaoInit;
import com.akto.dao.*;
import com.akto.dao.context.Context;
import com.akto.dao.filter.MergedUrlsDao;
import com.akto.dto.*;
import com.akto.dto.billing.SyncLimit;
import com.akto.dto.dependency_flow.DependencyFlow;
import com.akto.dto.filter.MergedUrls;
import com.akto.dao.monitoring.FilterYamlTemplateDao;
import com.akto.dao.runtime_filters.AdvancedTrafficFiltersDao;
import com.akto.dto.*;
Expand All @@ -25,6 +29,7 @@
import com.akto.dto.usage.MetricTypes;
import com.akto.log.LoggerMaker;
import com.akto.log.LoggerMaker.LogDb;
import com.akto.util.filter.DictionaryFilter;
import com.akto.runtime.merge.MergeOnHostOnly;
import com.akto.runtime.policies.AktoPolicyNew;
import com.akto.task.Cluster;
Expand All @@ -39,20 +44,17 @@
import com.google.common.hash.BloomFilter;
import com.google.common.hash.Funnels;
import com.mongodb.BasicDBObject;
import com.mongodb.ConnectionString;
import com.mongodb.bulk.BulkWriteResult;
import com.mongodb.client.model.*;
import com.mongodb.client.result.UpdateResult;
import org.apache.commons.lang3.math.NumberUtils;
import org.bson.Document;
import org.bson.conversions.Bson;
import org.bson.json.JsonParseException;
import org.bson.types.ObjectId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


import java.util.*;
import java.util.Map.Entry;

import static com.akto.dto.type.KeyTypes.patternToSubType;
Expand All @@ -72,6 +74,9 @@ public class APICatalogSync {
public Map<SensitiveParamInfo, Boolean> sensitiveParamInfoBooleanMap;
public static boolean mergeAsyncOutside = true;
public BloomFilter<CharSequence> existingAPIsInDb = BloomFilter.create(Funnels.stringFunnel(Charsets.UTF_8), 1_000_000, 0.001 );

public static Set<MergedUrls> mergedUrls;

public Map<String, FilterConfig> advancedFilterMap = new HashMap<>();

public APICatalogSync(String userIdentifier,int thresh, boolean fetchAllSTI) {
Expand All @@ -86,6 +91,7 @@ public APICatalogSync(String userIdentifier, int thresh, boolean fetchAllSTI, bo
this.delta = new HashMap<>();
this.sensitiveParamInfoBooleanMap = new HashMap<>();
this.aktoPolicyNew = new AktoPolicyNew();
mergedUrls = new HashSet<>();
if (buildFromDb) {
buildFromDB(false, fetchAllSTI);
AccountSettings accountSettings = AccountSettingsDao.instance.findOne(AccountSettingsDao.generateFilter());
Expand Down Expand Up @@ -720,6 +726,7 @@ public static URLTemplate tryParamteresingUrl(URLStatic newUrl){
int start = newUrl.getUrl().startsWith("http") ? 3 : 0;
for(int i = start; i < tokens.length; i ++) {
String tempToken = tokens[i];
if(DictionaryFilter.isEnglishWord(tempToken)) continue;

if (NumberUtils.isParsable(tempToken)) {
newTypes[i] = isNumber(tempToken) ? SuperType.INTEGER : SuperType.FLOAT;
Expand Down Expand Up @@ -749,7 +756,21 @@ public static URLTemplate tryParamteresingUrl(URLStatic newUrl){
}

if (allNull) return null;
return new URLTemplate(tokens, newTypes, newUrl.getMethod());

URLTemplate urlTemplate = new URLTemplate(tokens, newTypes, newUrl.getMethod());

try {
for(MergedUrls mergedUrl : mergedUrls) {
if(mergedUrl.getUrl().equals(urlTemplate.getTemplateString()) &&
mergedUrl.getMethod().equals(urlTemplate.getMethod().name())) {
return null;
}
}
} catch(Exception e) {
loggerMaker.errorAndAddToDb("Error while creating a new URL object: " + e.getMessage(), LogDb.RUNTIME);
}

return urlTemplate;
}


Expand All @@ -771,6 +792,7 @@ public static URLTemplate tryMergeUrls(URLStatic dbUrl, URLStatic newUrl) {
for(int i = 0; i < newTokens.length; i ++) {
String tempToken = newTokens[i];
String dbToken = dbTokens[i];
if (DictionaryFilter.isEnglishWord(tempToken) || DictionaryFilter.isEnglishWord(dbToken)) continue;

int minCount = dbUrl.getUrl().startsWith("http") && newUrl.getUrl().startsWith("http") ? 3 : 0;
if (tempToken.equalsIgnoreCase(dbToken) || i < minCount) {
Expand Down Expand Up @@ -804,7 +826,20 @@ public static URLTemplate tryMergeUrls(URLStatic dbUrl, URLStatic newUrl) {
if (allNull) return null;

if (templatizedStrTokens <= 1) {
return new URLTemplate(newTokens, newTypes, newUrl.getMethod());
URLTemplate urlTemplate = new URLTemplate(newTokens, newTypes, newUrl.getMethod());

try {
for(MergedUrls mergedUrl : mergedUrls) {
if(mergedUrl.getUrl().equals(urlTemplate.getTemplateString()) &&
mergedUrl.getMethod().equals(urlTemplate.getMethod().name())) {
return null;
}
}
} catch(Exception e) {
loggerMaker.errorAndAddToDb("Error while creating a new URL object: " + e.getMessage(), LogDb.RUNTIME);
}

return urlTemplate;
}

return null;
Expand Down Expand Up @@ -1558,6 +1593,8 @@ public void buildFromDB(boolean calcDiff, boolean fetchAllSTI) {
loggerMaker.errorAndAddToDb("Error while filling urls in apiCollection: " + e.getMessage(), LogDb.RUNTIME);
}

mergedUrls = MergedUrlsDao.instance.getMergedUrls();

loggerMaker.infoAndAddToDb("Building from db completed", LogDb.RUNTIME);
aktoPolicyNew.buildFromDb(fetchAllSTI);
}
Expand Down
4 changes: 3 additions & 1 deletion apps/api-runtime/src/main/java/com/akto/runtime/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import java.util.*;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;

import com.akto.DaoInit;
Expand All @@ -16,6 +15,7 @@
import com.akto.log.LoggerMaker;
import com.akto.log.LoggerMaker.LogDb;
import com.akto.parsers.HttpCallParser;
import com.akto.util.filter.DictionaryFilter;
import com.akto.runtime.utils.Utils;
import com.akto.util.AccountTask;
import com.akto.util.DashboardMode;
Expand Down Expand Up @@ -150,6 +150,8 @@ public static void main(String[] args) {
}
int maxPollRecordsConfig = Integer.parseInt(System.getenv("AKTO_KAFKA_MAX_POLL_RECORDS_CONFIG"));

DictionaryFilter.readDictionaryBinary();
TangoBeeAkto marked this conversation as resolved.
Show resolved Hide resolved

if (topicName == null) topicName = "akto.api.logs";

DaoInit.init(new ConnectionString(mongoURI));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import com.akto.runtime.APICatalogSync;
import com.akto.runtime.Main;
import com.akto.runtime.URLAggregator;
import com.akto.util.filter.DictionaryFilter;
import com.akto.runtime.utils.Utils;
import com.mongodb.BasicDBObject;
import com.mongodb.client.model.Filters;
Expand All @@ -43,6 +44,8 @@ public class TestDBSync extends MongoBasedTest {
public void changeAccountId() {
Context.accountId.set(currAccountId);
currAccountId += 1;
DictionaryFilter.readDictionaryBinary();

}

public void testInitializer(){
Expand Down Expand Up @@ -231,7 +234,7 @@ public void testInvalidMergeParameterizedURL() {
APICatalogSync.mergeUrlsAndSave(123, true, false, sync.existingAPIsInDb);
sync.buildFromDB(false, true);

assertEquals(0, sync.getDbState(123).getStrictURLToMethods().size());
assertEquals(1, sync.getDbState(123).getStrictURLToMethods().size());
assertEquals(1, sync.getDbState(123).getTemplateURLToMethods().size());


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import com.akto.dto.type.*;
import com.akto.runtime.APICatalogSync;
import com.akto.types.CappedSet;
import com.akto.util.filter.DictionaryFilter;
import com.akto.utils.RedactSampleData;
import com.google.api.client.util.Charsets;
import com.google.common.hash.BloomFilter;
Expand All @@ -25,6 +26,8 @@
import com.mongodb.client.model.Updates;
import org.bson.conversions.Bson;
import org.bson.types.ObjectId;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;

import java.util.*;
Expand All @@ -36,6 +39,14 @@

public class TestMergingNew extends MongoBasedTest {

@Before
public void initMain() {
DictionaryFilter.readDictionaryBinary();
}




public void testInitializer(){
Map<String, AktoDataType> aktoDataTypeMap = new HashMap<>();
aktoDataTypeMap.put("JWT", new AktoDataType(null, false, null, 0, new IgnoreData(new HashMap<>(), new HashSet<>()), false, true));
Expand Down Expand Up @@ -95,6 +106,82 @@ public void testMultipleIntegerMerging() {

}

@Test
public void testStringMerging() {
testInitializer();
SingleTypeInfoDao.instance.getMCollection().drop();
ApiCollectionsDao.instance.getMCollection().drop();
HttpCallParser parser = new HttpCallParser("userIdentifier", 1, 1, 1, true);

String baseUrl = "/api/";
List<HttpResponseParams> responseParams = new ArrayList<>();
List<String> urls = Arrays.asList(
baseUrl + "demo",
baseUrl + "cat",
baseUrl + "OSHE2CNS",
baseUrl + "2HOIWNJK",
baseUrl + "31a1a7c5-b4e3-47f5-8579-f7fc044c6a98",
baseUrl + "tree"
);

for (String c : urls) {
HttpResponseParams resp = createSampleParams("user1", c);
responseParams.add(resp);
}

parser.syncFunction(responseParams, false, true, null);
parser.apiCatalogSync.syncWithDB(false, true, SyncLimit.noLimit);
APICatalogSync.mergeUrlsAndSave(123, true, false, parser.apiCatalogSync.existingAPIsInDb);
parser.apiCatalogSync.buildFromDB(true, true);
Map<URLTemplate, RequestTemplate> urlTemplateMap = parser.apiCatalogSync.getDbState(123).getTemplateURLToMethods();
Map<URLStatic, RequestTemplate> strictUrlMap = parser.apiCatalogSync.getDbState(123).getStrictURLToMethods();


assertEquals(1, urlTemplateMap.size());
assertEquals(3, strictUrlMap.size());
}

@Test
public void testEnglishWordsUrlTestString() {
testInitializer();
SingleTypeInfoDao.instance.getMCollection().drop();
ApiCollectionsDao.instance.getMCollection().drop();
HttpCallParser parser = new HttpCallParser("userIdentifier", 1, 1, 1, true);
String url = "/link/";
List<HttpResponseParams> responseParams = new ArrayList<>();
List<String> urls = new ArrayList<>();
for (String x: Arrays.asList(
"apple", "banana", "cat", "dog", "elephant", "flower", "guitar", "house",
"island", "jungle", "kite", "lemon", "mountain", "night", "ocean", "piano",
"queen", "river", "sun", "tree", "umbrella", "village", "whale", "xylophone",
"yacht", "zebra", "bird", "clock", "desert", "engine", "forest", "garden",
"honey", "igloo", "jacket", "kangaroo", "lamp", "mirror", "notebook", "orange",
"pencil", "quilt", "rain", "star", "telephone", "uniform", "violin", "window",
"yellow", "zipper"
)) {
urls.add(url+x);
}
for (String c: urls) {
HttpResponseParams resp = createSampleParams("user1", c);
responseParams.add(resp);
}

parser.syncFunction(responseParams.subList(0,23), false, true, null);
parser.apiCatalogSync.syncWithDB(false, true, SyncLimit.noLimit);
assertEquals(23, getStaticURLsSize(parser));

parser.syncFunction(responseParams.subList(23,28), false, true, null);
parser.apiCatalogSync.syncWithDB(false, true, SyncLimit.noLimit);
APICatalogSync.mergeUrlsAndSave(123,true, false, parser.apiCatalogSync.existingAPIsInDb);
parser.apiCatalogSync.buildFromDB(false, true);
assertEquals(28, getStaticURLsSize(parser));

parser.syncFunction(responseParams.subList(28,33), false, true, null);
parser.apiCatalogSync.syncWithDB(false, true, SyncLimit.noLimit);
assertEquals(33, getStaticURLsSize(parser));
}


public int getStaticURLsSize(HttpCallParser parser) {
Map<URLStatic, RequestTemplate> urlStaticMap = parser.apiCatalogSync.getDbState(123).getStrictURLToMethods();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
import com.akto.action.UserAction;
import com.akto.dao.*;
import com.akto.dao.context.Context;
import com.akto.dao.filter.MergedUrlsDao;
import com.akto.dto.*;
import com.akto.dto.ApiInfo.ApiInfoKey;
import com.akto.dto.CodeAnalysisApiInfo.CodeAnalysisApiInfoKey;
import com.akto.dto.filter.MergedUrls;
import com.akto.dto.traffic.SampleData;
import com.akto.dto.type.*;
import com.akto.dto.type.URLMethods.Method;
Expand Down Expand Up @@ -723,6 +725,20 @@ public String deMergeApi() {
return ERROR.toUpperCase();
}

try {
MergedUrlsDao.instance.updateOne(Filters.and(
Filters.eq(MergedUrls.URL, url),
Filters.eq(MergedUrls.METHOD, method),
Filters.eq(MergedUrls.API_COLLECTION_ID, apiCollectionId)
), Updates.combine(
Updates.set(MergedUrls.URL, url),
Updates.set(MergedUrls.METHOD, method),
Updates.set(MergedUrls.API_COLLECTION_ID, apiCollectionId)
));
} catch (Exception e) {
loggerMaker.errorAndAddToDb("Error while saving merged url in DB: " + e.getMessage(), LogDb.DASHBOARD);
}


SampleData sampleData = SampleDataDao.instance.fetchSampleDataForApi(apiCollectionId, url, urlMethod);
List<String> samples = sampleData.getSamples();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
import com.akto.testing.HostDNSLookup;
import com.akto.usage.UsageMetricHandler;
import com.akto.testing.workflow_node_executor.Utils;
import com.akto.util.filter.DictionaryFilter;
import com.akto.utils.jobs.JobUtils;
import com.akto.utils.jobs.MatchingJob;
import com.akto.util.AccountTask;
Expand Down Expand Up @@ -1914,6 +1915,7 @@ public static boolean isNotKubernetes() {
@Override
public void contextInitialized(javax.servlet.ServletContextEvent sce) {
setSubdomain();
DictionaryFilter.readDictionaryBinary();

String https = System.getenv("AKTO_HTTPS_FLAG");
if (Objects.equals(https, "true")) {
Expand Down
Loading
Loading