Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"migrate:download:rest": "node scripts/export-wp-rest.mjs",
"migrate:convert:wxr": "node scripts/convert-wxr-to-content.mjs",
"migrate:convert:rest": "node scripts/convert-rest-to-content.mjs",
"migrate:authors:map": "node scripts/build-author-post-paths.mjs",
"migrate:related:legacy": "node scripts/extract-legacy-related-posts.mjs",
"migrate:media:manifest": "node scripts/build-media-manifest.mjs",
"migrate:media:manifest:site": "node scripts/build-wp-content-manifest.mjs",
Expand Down
186 changes: 186 additions & 0 deletions scripts/build-author-post-paths.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
#!/usr/bin/env node
import fs from 'node:fs/promises';
import path from 'node:path';
import process from 'node:process';
import matter from 'gray-matter';

const args = parseArgs(process.argv.slice(2));
const outPath = path.resolve(args.out ?? 'src/data/author-post-paths.json');
const postsDir = path.resolve(args.postsDir ?? 'src/content/posts');
const wxrDir = path.resolve(args.wxrDir ?? 'data/raw');

const AUTHOR_ALIASES = {
Tiffany: 'tiffany',
'alan@rentmoreweeks.com': 'alan',
'Our Discount Desk': 'our-discount-desk',
'Our Travel Reporter': 'our-travel-reporter',
};

const ROUTE_ALIASES = new Map([
['/where-am-i-24-2/', '/where-am-i-24/'],
]);

const localPosts = await loadLocalPosts(postsDir);
const authorPathMap = await buildAuthorPathMap(wxrDir, localPosts);

await fs.mkdir(path.dirname(outPath), { recursive: true });
await fs.writeFile(outPath, `${JSON.stringify(authorPathMap, null, 2)}\n`);

console.log(`Author post paths written: ${outPath}`);
for (const [slug, routes] of Object.entries(authorPathMap)) {
console.log(`- ${slug}: ${routes.length}`);
}

function parseArgs(argv) {
const out = {};
for (let i = 0; i < argv.length; i += 1) {
const arg = argv[i];
if (!arg.startsWith('--')) continue;
const [key, inlineValue] = arg.split('=');
const name = key.slice(2);
if (inlineValue !== undefined) {
out[name] = inlineValue;
continue;
}
const next = argv[i + 1];
if (!next || next.startsWith('--')) {
out[name] = true;
} else {
out[name] = next;
i += 1;
}
}
return out;
}

async function loadLocalPosts(rootDir) {
const files = (await fs.readdir(rootDir))
.filter((entry) => entry.endsWith('.md'))
.map((entry) => path.join(rootDir, entry));

const byWordpressId = new Map();
const byRoute = new Map();

for (const filePath of files) {
const raw = await fs.readFile(filePath, 'utf8');
const { data } = matter(raw);
if (data.status !== 'publish' || data.draft === true) continue;

const wordpressId = String(data.wordpressId ?? '').trim();
const route = normalizeRoutePath(data.path);
const dateValue = toTimestamp(data.date);
if (!route || !dateValue) continue;

const record = {
wordpressId,
route,
date: dateValue,
};

if (wordpressId) byWordpressId.set(wordpressId, record);
byRoute.set(route, record);
}

return { byWordpressId, byRoute };
}

async function buildAuthorPathMap(wxrDir, localPosts) {
const files = (await fs.readdir(wxrDir))
.filter((entry) => /^wordpress-export-posts-\d{4}\.xml$/.test(entry))
.map((entry) => path.join(wxrDir, entry))
.sort();

const pathsBySlug = new Map(Object.values(AUTHOR_ALIASES).map((slug) => [slug, new Map()]));

for (const filePath of files) {
const raw = await fs.readFile(filePath, 'utf8');
for (const item of iterateItems(raw)) {
const slug = AUTHOR_ALIASES[item.creator];
if (!slug) continue;
if (item.postType !== 'post' || item.status !== 'publish') continue;

const sourceRoute = normalizeRoutePath(item.link);
const preferred = localPosts.byWordpressId.get(item.wordpressId);
const fallback = localPosts.byRoute.get(sourceRoute);
const aliasTarget = ROUTE_ALIASES.get(sourceRoute) ?? '';
const aliasResolved = aliasTarget ? localPosts.byRoute.get(aliasTarget) : null;
const resolved = preferred ?? fallback ?? aliasResolved;
if (!resolved) continue;

const recordedRoute =
preferred || fallback || !aliasResolved ? resolved.route : sourceRoute;

pathsBySlug.get(slug).set(recordedRoute, resolved.date);
}
}

return Object.fromEntries(
[...pathsBySlug.entries()].map(([slug, routes]) => {
const orderedRoutes = [...routes.entries()]
.sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))
.map(([route]) => route);
return [slug, orderedRoutes];
})
);
}

function *iterateItems(xml) {
const itemRegex = /<item>([\s\S]*?)<\/item>/g;
for (const match of xml.matchAll(itemRegex)) {
const item = match[1];
yield {
creator: decodeXml(extractCdata(item, 'dc:creator')),
status: extractCdata(item, 'wp:status'),
postType: extractCdata(item, 'wp:post_type'),
wordpressId: extractTag(item, 'wp:post_id'),
link: decodeXml(extractTag(item, 'link')),
};
}
}

function extractCdata(source, tagName) {
const match = source.match(new RegExp(`<${escapeRegExp(tagName)}><!\\[CDATA\\[([\\s\\S]*?)\\]\\]><\\/${escapeRegExp(tagName)}>`, 'i'));
return match ? match[1].trim() : '';
}

function extractTag(source, tagName) {
const match = source.match(new RegExp(`<${escapeRegExp(tagName)}>([\\s\\S]*?)<\\/${escapeRegExp(tagName)}>`, 'i'));
return match ? match[1].trim() : '';
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

function escapeRegExp(value) {
return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}

function decodeXml(value) {
return String(value || '')
.replace(/&#038;/g, '&')
.replace(/&#8211;/g, '–')
.replace(/&#8217;/g, '’')
.replace(/&#8220;/g, '“')
.replace(/&#8221;/g, '”')
.replace(/&amp;/g, '&')
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>');
}

function normalizeRoutePath(value) {
const raw = String(value || '')
.replace(/https?:\/\/blog\.hichee\.com/i, '')
.replace(/%ef%bf%bc/gi, '')
.replace(/\uFFFC/g, '')
.trim();

if (!raw) return '';

const withLeadingSlash = raw.startsWith('/') ? raw : `/${raw}`;
return withLeadingSlash.endsWith('/') ? withLeadingSlash : `${withLeadingSlash}/`;
}

function toTimestamp(value) {
if (value instanceof Date) return value.getTime();
const parsed = Date.parse(String(value || ''));
return Number.isFinite(parsed) ? parsed : 0;
Comment on lines +176 to +188

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Entity ordering can cause double-decode for &#038;amp;

&#038; is the numeric character reference for &, identical to &amp;. Replacing it first converts &#038;amp;&amp;&, which double-decodes a literal &amp; entity. WordPress WXR files can contain doubly-escaped content (e.g. creator display names with ampersands). Merging both patterns into one avoids this:

.replace(/&amp;|&#038;/g, '&')

}
86 changes: 86 additions & 0 deletions src/components/PaginationNav.astro
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
---
interface Props {
basePath: string;
currentPage: number;
totalPages: number;
}

const { basePath, currentPage, totalPages } = Astro.props;
const items = totalPages > 1 ? buildItems(currentPage, totalPages) : [];
const previousHref = totalPages > 1 && currentPage > 1 ? pageHref(basePath, currentPage - 1) : null;
const nextHref = totalPages > 1 && currentPage < totalPages ? pageHref(basePath, currentPage + 1) : null;

function buildItems(current: number, total: number) {
const pages = new Set([1, total, current - 1, current, current + 1]);
if (current <= 3) {
pages.add(2);
pages.add(3);
}
if (current >= total - 2) {
pages.add(total - 1);
pages.add(total - 2);
}

const ordered = [...pages]
.filter((page) => page >= 1 && page <= total)
.sort((a, b) => a - b);

const items = [];
let previous = 0;
for (const page of ordered) {
if (previous && page - previous > 1) {
items.push({ type: 'ellipsis', key: `ellipsis-${previous}-${page}` });
}
items.push({ type: 'page', key: `page-${page}`, page });
previous = page;
}
return items;
}

function pageHref(rootPath: string, page: number) {
if (page <= 1) return rootPath;
return `${rootPath}page/${page}/`;
}
---

{
totalPages > 1 && (
<nav class="pagination-nav" aria-label="Author pages">
<div class="pagination-nav__list">
{
previousHref && (
<a class="page-numbers page-numbers--nav" href={previousHref} rel="prev">
Previous
</a>
)
}

{
items.map((item) =>
item.type === 'ellipsis' ? (
<span class="page-numbers page-numbers--ellipsis" aria-hidden="true">
</span>
) : item.page === currentPage ? (
<span class="page-numbers current" aria-current="page">
{item.page}
</span>
) : (
<a class="page-numbers" href={pageHref(basePath, item.page)}>
{item.page}
</a>
)
)
}

{
nextHref && (
<a class="page-numbers page-numbers--nav" href={nextHref} rel="next">
Next
</a>
)
}
</div>
</nav>
)
}
Loading
Loading