Skip to content

Commit

Permalink
Added caching and retry policy (#2)
Browse files Browse the repository at this point in the history
* Added caches

* Refactors to move to sqlite

* Remove unused storage api

* Remove obsolete comment

* Added density to version
  • Loading branch information
PeterDowdy authored Dec 2, 2021
1 parent aceb079 commit d1770a6
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 33 deletions.
30 changes: 16 additions & 14 deletions RezoningScraper/API.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using Spectre.Console;
using System.Net.Http.Headers;
using System.Text.Json;
using Polly;

namespace RezoningScraper;

Expand All @@ -12,10 +13,13 @@ public static class API
/// Get all projects from the ShapeYourCity API.
/// </summary>
/// <returns>An async enumerable of projects (because the API is paginated)</returns>
public static async IAsyncEnumerable<Project> GetAllProjects(string jwt)
public static async IAsyncEnumerable<Project> GetAllProjects(string jwt, bool useCache = false)
{
IAsyncPolicy<Projects> cachePolicy = useCache
? Policy.CacheAsync<Projects>(new CacheManager<Projects>(), TimeSpan.FromHours(1))
: Policy.NoOpAsync<Projects>();
var client = new HttpClient();
string startUrl = $"https://shapeyourcity.ca/api/v2/projects?per_page={ResultsPerPage}";
HttpClient client = new();

string? next = startUrl;

Expand All @@ -24,24 +28,22 @@ public static async IAsyncEnumerable<Project> GetAllProjects(string jwt)
// loop over result pages
while (next != null)
{
HttpRequestMessage message = new(HttpMethod.Get, next);
HttpRequestMessage message = new(HttpMethod.Get, next);
message.Headers.Authorization = new AuthenticationHeaderValue("Bearer", jwt);
HttpResponseMessage response = await client.SendAsync(message);
string responseContent = await response.Content.ReadAsStringAsync();

var deserialized = JsonSerializer.Deserialize<Projects>(responseContent);
var data = deserialized?.data;

if (data != null)
var deserializedResponse = await cachePolicy.ExecuteAsync(async context =>
{
var response = await Policy.Handle<Exception>().RetryAsync(3).ExecuteAsync(async () => await client.SendAsync(message));
return JsonSerializer.Deserialize<Projects>(await response.Content.ReadAsStringAsync()) ?? new Projects();
}, new Context(next));
if (deserializedResponse?.data is not null)
{
AnsiConsole.WriteLine($"Retrieved page {++pageCount} ({data.Count()} items)");
foreach (var item in data)
AnsiConsole.WriteLine($"Retrieved page {++pageCount} ({deserializedResponse.data.Count()} items)");
foreach (var item in deserializedResponse?.data ?? Enumerable.Empty<Project>())
{
yield return item;
}
next = deserializedResponse?.links?.next;
}

next = deserialized?.links?.next;
}
}
}
75 changes: 75 additions & 0 deletions RezoningScraper/CacheManager.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
using Polly.Caching;
using System.Text.Json;
using System.Security.Cryptography;
using System.Text;
using static Spectre.Console.AnsiConsole;
using Dapper;

namespace RezoningScraper
{
public class CacheManager<TResult> : IAsyncCacheProvider<TResult>
{
public async Task PutAsync(string key, TResult value, Ttl ttl, CancellationToken ct, bool continueOnCapturedContext)
{
using var dbConnection = DbHelper.CreateOrOpenFileDb("RezoningScraper.db");
using var sha = SHA256.Create();
var hashbytes = sha.ComputeHash(Encoding.UTF8.GetBytes(key));
var hashstring = new StringBuilder();
foreach (var b in hashbytes)
{
hashstring.Append(b.ToString("X2"));
}

using var ms = new MemoryStream();
await JsonSerializer.SerializeAsync(ms, value, cancellationToken: ct);


using var tran = dbConnection.BeginTransaction();
dbConnection.Execute(
"REPLACE INTO Cache(Key, Expiration, Value) VALUES(@Key, @Expiration,@Value)",
new {
Key = hashstring.ToString(),
Expiration = (DateTimeOffset.UtcNow + ttl.Timespan).ToUnixTimeMilliseconds(),
Value = Encoding.UTF8.GetString(ms.ToArray())
});

tran.Commit();
WriteLine($"Completed put cache for {key}");
}

public async Task<(bool, TResult)> TryGetAsync(string key, CancellationToken ct, bool continueOnCapturedContext)
{
using var dbConnection = DbHelper.CreateOrOpenFileDb("RezoningScraper.db");
using var sha = SHA256.Create();
var hashbytes = sha.ComputeHash(Encoding.UTF8.GetBytes(key));
var hashstring = new StringBuilder();
foreach (var b in hashbytes)
{
hashstring.Append(b.ToString("X2"));
}

var cachedValue = await dbConnection.ExecuteScalarAsync<string>("SELECT Value FROM Cache WHERE Key = @Key AND Expiration > @Expiration",
new
{
Key=hashstring.ToString(),
Expiration=DateTimeOffset.UtcNow.ToUnixTimeMilliseconds()
});
try
{
if (string.IsNullOrEmpty(cachedValue))
{
WriteLine($"No value in cache for {key}");
return (false, default(TResult));
}
var model = JsonSerializer.Deserialize<TResult>(cachedValue);
WriteLine($"Found value in cache for {key}");
return (true, model);
}
catch
{
WriteLine($"Error on get cache for {key}");
return (false, default(TResult));
}
}
}
}
10 changes: 9 additions & 1 deletion RezoningScraper/DbHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,15 @@ CREATE TABLE IF NOT EXISTS
TokenCache(
Expiration INTEGER NOT NULL,
Token TEXT NOT NULL
);";
);
CREATE TABLE IF NOT EXISTS
Cache(
Key TEXT PRIMARY KEY,
Expiration INTEGER NOT NULL,
Value TEXT NOT NULL
);
";
conn.Execute(sql);
}

Expand Down
9 changes: 5 additions & 4 deletions RezoningScraper/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,17 @@ static async Task<int> Main(string[] args)
new Option<string?>("--slack-webhook-url",
getDefaultValue: () => "",
description: "A Slack Incoming Webhook URL. If specified, RezoningScraper will post info about new+modified rezonings to this address."),
new Option<bool>("--use-cache", description: "Use cached json queries, as long as they are fresh enough."),
new Option<bool>("--save-to-db",
getDefaultValue: () => true,
description: "Whether to save the API results to database."),
description: "Whether to save the API results to database.")
};

rootCommand.Handler = CommandHandler.Create<string, bool>(RunScraper);
return await rootCommand.InvokeAsync(args);
}

static async Task RunScraper(string slackWebhookUrl, bool saveToDb)
static async Task RunScraper(string slackWebhookUrl, bool useCache, bool saveToDb)
{
MarkupLine($"[green]Welcome to RezoningScraper v{Assembly.GetExecutingAssembly().GetName().Version}[/]");
if(string.IsNullOrWhiteSpace(slackWebhookUrl)) { WriteLine($"Slack URI not specified; will not publish updates to Slack."); }
Expand All @@ -45,12 +46,12 @@ await AnsiConsole.Status().StartAsync("Opening DB...", async ctx =>
db.InitializeSchemaIfNeeded();

ctx.Status = "Loading token...";
var token = await TokenHelper.GetTokenFromDbOrWebsite(db);
var token = await TokenHelper.GetTokenFromDbOrWebsite(db, useCache);

ctx.Status = "Querying API...";
WriteLine("Starting API query...");
var stopwatch = Stopwatch.StartNew();
var latestProjects = await API.GetAllProjects(token.JWT).ToListAsync();
var latestProjects = await API.GetAllProjects(token.JWT, useCache).ToListAsync();
MarkupLine($"API query finished: retrieved {latestProjects.Count} projects in [yellow]{stopwatch.ElapsedMilliseconds}ms[/]");

ctx.Status = "Comparing against projects in local database...";
Expand Down
3 changes: 2 additions & 1 deletion RezoningScraper/RezoningScraper.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<Version>2.1.0.0</Version>
<Version>2.2.0.0</Version>

</PropertyGroup>

Expand All @@ -17,6 +17,7 @@
<PackageReference Include="AngleSharp" Version="0.16.1" />
<PackageReference Include="Dapper" Version="2.0.123" />
<PackageReference Include="Microsoft.Data.Sqlite" Version="6.0.0" />
<PackageReference Include="Polly" Version="7.2.2" />
<PackageReference Include="Spectre.Console" Version="0.42.0" />
<PackageReference Include="System.CommandLine" Version="2.0.0-beta1.21308.1" />
<PackageReference Include="System.IdentityModel.Tokens.Jwt" Version="6.14.1" />
Expand Down
32 changes: 19 additions & 13 deletions RezoningScraper/TokenHelper.cs
Original file line number Diff line number Diff line change
@@ -1,42 +1,48 @@
using AngleSharp.Html.Parser;
using Microsoft.Data.Sqlite;
using Polly;
using System.IdentityModel.Tokens.Jwt;
using System.Net;
using System.Text.Json.Nodes;
using static Spectre.Console.AnsiConsole;

namespace RezoningScraper;

internal static class TokenHelper
{
internal static async Task<Token> GetTokenFromDbOrWebsite(SqliteConnection db)
internal static async Task<Token> GetTokenFromDbOrWebsite(SqliteConnection db, bool useCache)
{
Token? tokenFromDb = db.GetToken();

if (tokenFromDb != null && tokenFromDb.Expiration > DateTimeOffset.UtcNow.AddMinutes(1))
{
WriteLine($"Loaded API token from database. Cached token will expire on {tokenFromDb.Expiration}");
return tokenFromDb;
}
else
{
// TODO: add retries, this page seems unreliable

IAsyncPolicy<Token> cachePolicy = useCache
? Policy.CacheAsync<Token>(new CacheManager<Token>(), TimeSpan.FromMinutes(1))
: Policy.NoOpAsync<Token>();
WriteLine("Getting latest anonymous user token from shapeyourcity.ca");
var client = new HttpClient() { Timeout = TimeSpan.FromSeconds(20) };
var htmlToParse = await client.GetStringAsync("https://shapeyourcity.ca/embeds/projectfinder");

string jwt = ExtractTokenFromHtml(htmlToParse);
return await cachePolicy.ExecuteAsync(async context =>
{
var htmlToParse = await Policy.Handle<Exception>()
.RetryAsync(3)
.ExecuteAsync(async () => await client.GetStringAsync("https://shapeyourcity.ca/embeds/projectfinder"));
string jwt = ExtractTokenFromHtml(htmlToParse);

DateTimeOffset expiration = GetExpirationFromEncodedJWT(jwt);
DateTimeOffset expiration = GetExpirationFromEncodedJWT(jwt);

WriteLine($"Retrieved JWT with expiration date {expiration}");
WriteLine($"Retrieved JWT with expiration date {expiration}");

var newToken = new Token(expiration, jwt);
var newToken = new Token(expiration, jwt);

db.SetToken(newToken);
WriteLine($"Cached JWT in local database");
db.SetToken(newToken);
WriteLine($"Cached JWT in local database");

return newToken;
return newToken;
}, new Context("https://shapeyourcity.ca/embeds/projectfinder"));
}
}

Expand Down

0 comments on commit d1770a6

Please sign in to comment.