Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve filenames for downloaded assets #1232

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 84 additions & 19 deletions DiscordChatExporter.Core/Exporting/ExportAssetDownloader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Security.Cryptography;
using System.Text;
using System.Text.RegularExpressions;
Expand Down Expand Up @@ -92,34 +93,46 @@ await using (var output = File.Create(filePath))

internal partial class ExportAssetDownloader
{
private static string GetUrlHash(string url)
private const String CHARSET = "0123456789bcdfghjklmnpqrstvwxyz_";

private static String Base32(byte[] data)
{
// Remove signature parameters from Discord CDN URLs to normalize them
static string NormalizeUrl(string url)
{
var uri = new Uri(url);
if (!string.Equals(uri.Host, "cdn.discordapp.com", StringComparison.OrdinalIgnoreCase))
return url;
var newString = new StringBuilder();
uint accum = 0;
uint bits = 0;

var query = HttpUtility.ParseQueryString(uri.Query);
query.Remove("ex");
query.Remove("is");
query.Remove("hm");
foreach (byte b in data)
{
accum <<= 8;
accum |= b;
bits += 8;

return uri.GetLeftPart(UriPartial.Path) + query;
while (bits > 5)
{
char ch = CHARSET[(int)(accum & 0x1F)];
accum >>= 5;
bits -= 5;
newString.Append(ch);
}
}
if (bits != 0)
{
char ch = CHARSET[(int)(accum & 0x1F)];
newString.Append(ch);
}

return SHA256
.HashData(Encoding.UTF8.GetBytes(NormalizeUrl(url)))
.ToHex()
// 5 chars ought to be enough for anybody
.Truncate(5);
return newString.ToString();
}

private static string GetFileNameFromUrl(string url)
private static string GetUrlHash(string url)
{
var urlHash = GetUrlHash(url);
var hash = SHA256.HashData(Encoding.UTF8.GetBytes(url));
// 12 characters of base32 contains about as much entropy as a Discord snowflake
return Base32(hash).Truncate(12);
}

private static string AddHashToUrl(string url, string urlHash)
{
// Try to extract the file name from URL
var fileName = Regex.Match(url, @".+/([^?]*)").Groups[1].Value;

Expand All @@ -143,4 +156,56 @@ private static string GetFileNameFromUrl(string url)
fileNameWithoutExtension.Truncate(42) + '-' + urlHash + fileExtension
);
}

private static string GetFileNameFromUrl(string url)
{
var uri = new Uri(url);

if (string.Equals(uri.Host, "cdn.discordapp.com"))
{
string[] split = uri.AbsolutePath.Split("/");

// Attachments
if (uri.AbsolutePath.StartsWith("/attachments/") && split.Length == 5)
{
// use the attachment snowflake for attachments
if (ulong.TryParse(split[3], out var snowflake))
return AddHashToUrl(url, snowflake.ToString());
}

// Emojis
if (
uri.AbsolutePath.StartsWith("/emojis/")
&& split.Length == 3
&& split[2].Contains(".")
)
{
var nameSplit = split[2].Split(".", 2);
if (ulong.TryParse(nameSplit[0], out var snowflake))
return $"emoji-discord-{snowflake}.{nameSplit[1]}";
}

// Avatars
if (uri.AbsolutePath.StartsWith("/avatars/") && split.Length == 4)
{
return $"avatar-{split[2]}-{GetUrlHash(url)}.{split[3].Split(".").Last()}";
}
}

if (string.Equals(uri.Host, "cdn.jsdelivr.net"))
{
string[] split = uri.AbsolutePath.Split("/");

// twemoji
if (
uri.AbsolutePath.StartsWith("/gh/twitter/twemoji@latest/assets/svg/")
&& split.Length == 7
)
{
return $"emoji-twemoji-{split[6]}";
}
}

return AddHashToUrl(url, GetUrlHash(url));
}
}