-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add the possibility to transform the URL in "OpenLink".
- Loading branch information
David Robinson
committed
Sep 26, 2024
1 parent
eb5d18e
commit 59357ac
Showing
5 changed files
with
37 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,29 @@ | ||
using CocoCrawler.Builders; | ||
using AngleSharp.Dom; | ||
using CocoCrawler.Builders; | ||
using CocoCrawler.Job.PageBrowserActions; | ||
|
||
namespace CocoCrawler.Job.PageTasks; | ||
|
||
public class CrawlPageOpenLinksTask(string paginationSelector, PageCrawlJobBuilder builder, PageActions? pageActions = null) : IPageCrawlTask | ||
public class CrawlPageOpenLinksTask : IPageCrawlTask | ||
{ | ||
public string OpenLinksSelector { get; init; } = paginationSelector; | ||
public PageActions? PageActions { get; init; } = pageActions; | ||
public PageCrawlJobBuilder JobBuilder { get; init; } = builder; | ||
public string OpenLinksSelector { get; init; } | ||
public PageActions? PageActions { get; init; } | ||
public Func<IElement, string?>? LinkProcessor { get; } | ||
public PageCrawlJobBuilder JobBuilder { get; init; } | ||
|
||
public CrawlPageOpenLinksTask(string linksSelector, PageCrawlJobBuilder builder, PageActions? pageActions = null) | ||
{ | ||
OpenLinksSelector = linksSelector; | ||
PageActions = pageActions; | ||
JobBuilder = builder; | ||
} | ||
|
||
public CrawlPageOpenLinksTask(string linksSelector, PageCrawlJobBuilder builder, PageActions? pageActions = null, Func<IElement, string?>? linkProcessor = null) | ||
{ | ||
OpenLinksSelector = linksSelector; | ||
PageActions = pageActions; | ||
LinkProcessor = linkProcessor; | ||
JobBuilder = builder; | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,13 @@ | ||
using CocoCrawler.Job.PageTasks; | ||
using AngleSharp.Dom; | ||
using CocoCrawler.Job.PageTasks; | ||
using Newtonsoft.Json.Linq; | ||
|
||
namespace CocoCrawler.Parser; | ||
|
||
public interface IParser | ||
{ | ||
Task Init(string html); | ||
string[] ParseForLinks(string linksSelector); | ||
string[] ParseForLinks(string linksSelector, Func<IElement, string?>? linkProcessor = null); | ||
JArray ExtractList(CrawlPageExtractListTask scrapeList); | ||
JObject ExtractObject(CrawlPageExtractObjectTask task); | ||
} |