Skip to content

Commit

Permalink
Merge pull request #24 from justynhunter/feature/allow-contentparser-set
Browse files Browse the repository at this point in the history
Adds methods to set the IContentParser
  • Loading branch information
pavlovtech authored Nov 21, 2023
2 parents c5d4b7c + 0f84f72 commit 988ea8c
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 9 deletions.
16 changes: 9 additions & 7 deletions WebReaper/Builders/ScraperEngineBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
using WebReaper.Core.CookieStorage.Abstract;
using WebReaper.Core.LinkTracker.Abstract;
using WebReaper.Core.LinkTracker.Concrete;
using WebReaper.Core.Parser.Abstract;
using WebReaper.Core.Scheduler.Abstract;
using WebReaper.Core.Scheduler.Concrete;
using WebReaper.Domain;
Expand Down Expand Up @@ -35,9 +36,14 @@ public class ScraperEngineBuilder

private IScheduler Scheduler { get; set; } = new InMemoryScheduler();
private IScraperConfigStorage? ConfigStorage { get; set; } = new InMemoryScraperConfigStorage();

protected IProxyProvider? ProxyProvider { get; set; }

public ScraperEngineBuilder WithContentParser(IContentParser contentParser)
{
SpiderBuilder.WithContentParser(contentParser);
return this;
}

public ScraperEngineBuilder AddSink(IScraperSink sink)
{
SpiderBuilder.AddSink(sink);
Expand Down Expand Up @@ -186,7 +192,6 @@ public ScraperEngineBuilder GetWithBrowser(
ConfigBuilder.GetWithBrowser(startUrls, actionBuilder?.Invoke(new PageActionBuilder()));
return this;
}

public ScraperEngineBuilder GetWithBrowser(params string[] startUrls)
{
ConfigBuilder.GetWithBrowser(startUrls);
Expand All @@ -201,7 +206,7 @@ public ScraperEngineBuilder Follow(string linkSelector)

public ScraperEngineBuilder FollowWithBrowser(
string linkSelector,
Func<PageActionBuilder,
Func<PageActionBuilder,
List<PageAction>>? actionBuilder = null)
{
ConfigBuilder.FollowWithBrowser(linkSelector, actionBuilder?.Invoke(new PageActionBuilder()));
Expand Down Expand Up @@ -278,7 +283,6 @@ public ScraperEngineBuilder WithMongoDbCookieStorage(string connectionString, st
logger);
return this;
}

public ScraperEngineBuilder WithFileCookieStorage(string fileName)
{
SpiderBuilder.WithFileCookieStorage(fileName);
Expand Down Expand Up @@ -335,12 +339,10 @@ public ScraperEngineBuilder WithParallelismDegree(int parallelismDegree)
public async Task<ScraperEngine> BuildAsync()
{
SpiderBuilder.WithConfigStorage(ConfigStorage);

Check warning on line 341 in WebReaper/Builders/ScraperEngineBuilder.cs

View workflow job for this annotation

GitHub Actions / build

Possible null reference argument for parameter 'scraperConfigStorage' in 'SpiderBuilder SpiderBuilder.WithConfigStorage(IScraperConfigStorage scraperConfigStorage)'.

var config = ConfigBuilder.Build();
var spider = SpiderBuilder.Build();

await ConfigStorage.CreateConfigAsync(config);

return new ScraperEngine(_parallelismDegree, ConfigStorage, Scheduler, spider, Logger);
}
}
}
10 changes: 8 additions & 2 deletions WebReaper/Builders/SpiderBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ public class SpiderBuilder

protected event Action<ParsedData> ScrapedData;

Check warning on line 52 in WebReaper/Builders/SpiderBuilder.cs

View workflow job for this annotation

GitHub Actions / build

Non-nullable event 'ScrapedData' must contain a non-null value when exiting constructor. Consider declaring the event as nullable.

public SpiderBuilder WithContentParser(IContentParser contentParser)
{
ContentParser = contentParser;
return this;
}

public SpiderBuilder WithLogger(ILogger logger)
{
Logger = logger;
Expand Down Expand Up @@ -166,7 +172,7 @@ public SpiderBuilder WithRedisCookieStorage(string connectionString, string redi
CookieStorage = new RedisCookieStorage(connectionString, redisKey, Logger);
return this;
}

public SpiderBuilder WithFileCookieStorage(string fileName)
{
CookieStorage = new FileCookieStorage(fileName, Logger);
Expand Down Expand Up @@ -235,4 +241,4 @@ public ISpider Build()

return spider;
}
}
}

0 comments on commit 988ea8c

Please sign in to comment.