Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

活动列表的 OOP 重构 #4

Open
TechQuery opened this issue May 4, 2023 · 2 comments
Open

活动列表的 OOP 重构 #4

TechQuery opened this issue May 4, 2023 · 2 comments
Assignees

Comments

@TechQuery
Copy link
Member

TechQuery commented May 4, 2023

目标模块

export async function* HuoDongXing(all?: boolean) {
for (let page = 1, empty = true; ; page++) {
const URL =
'https://www.huodongxing.com/eventlist?' +
new URLSearchParams({
orderby: 'n',
status: all ? '' : '1',
tag: 'IT互联网',
city: '全部',
page: page + ''
});
for await (const item of eventList(
URL,
'.search-tab-content-list .search-tab-content-item',
'.item-title',
'.item-data',
'.item-dress',
'.item-logo',
'.item-title'
)) {
empty = false;
const [start, end] = (item.start as string).split('-');
yield {
...item,
start: makeDate(start),
end: makeDate(end)
} as Event;
}
if (empty) break;
}
}
export async function* SegmentFault(all?: boolean) {
for (let page = 1, empty = true; ; page++) {
const URL = 'https://segmentfault.com/events?page=' + page,
now = new Date();
for await (const item of eventList(
URL,
'.all-event-list .widget-event',
'.title',
'.widget-event__meta > :first-child',
'.widget-event__meta > :last-child',
'.widget-event__banner',
'.title > a'
)) {
empty = false;
const start = makeDate((item.start as string).slice(3));
if (!all && now > start) return;
yield {
...item,
start,
end: null,
address: (item.address as string).slice(3)
} as Event;
}
if (empty) break;
}
}
export async function* JueJin(all?: boolean) {
for (let page = 1; ; page++) {
const URI =
'https://event-storage-api-ms.juejin.im/v2/getEventList?' +
new URLSearchParams({
src: 'web',
orderType: 'startTime',
pageNum: page + ''
}),
now = new Date();
const { d: list } = await (await fetch(URI)).json();
if (!list?.[0]) break;
console.warn(URI);
for (const {
title,
eventUrl,
tagInfo,
content,
startTime,
endTime,
city,
screenshot
} of list) {
const start = makeDate(startTime),
end = makeDate(endTime);
if (!all && now > start) return;
yield {
title,
start,
end,
address: city,
tags: tagInfo.map(({ title }) => title),
summary: content,
link: new URL(eventUrl),
banner: new URL(screenshot)
} as Event;
}
}
}
export async function* BaiGe(all?: boolean) {
const {
window: {
document: { head }
}
} = await JSDOM.fromURL(
'https://www.bagevent.com/eventlist.html?f=1&tag=17&r=orderByNew'
);
const { paramMap, imgDomain, mainDomain } = new Function(`${
[
...head.querySelectorAll<HTMLScriptElement>('script:not(:empty)')
].find(code => /var param = \{[\s\S]+\}/.test(code.text)).text
}
return param;`)();
for (let page = 1; ; page++) {
paramMap.pagingPage = page;
const URI = `${mainDomain}/load/loadSearchEventList.do?${new URLSearchParams(
paramMap
)}`,
now = new Date();
const { list } = (
await (await fetch(URI)).json()
).resultObject.valueList;
if (!list?.[0]) break;
console.warn(URI);
for (const {
event_name,
start_time,
address,
logo,
event_id
} of list) {
const start = makeDate(start_time);
if (!all && now > start) return;
yield {
title: event_name,
start,
address,
banner: imgDomain + logo,
link: new URL(mainDomain + '/event/' + event_id)
} as Event;
}
}
}
export async function* OSChina(all?: boolean) {
for (let page = 1, empty = true; ; page++) {
const body = new URLSearchParams({
tab: 'latest',
time: 'all',
p: page + ''
}),
URL = 'https://www.oschina.net/action/ajax/get_more_event_list',
now = new Date();
const data = await (await fetch(URL, { method: 'POST', body })).text();
for await (const item of eventList(
new JSDOM(data, { url: URL + '?' + body }),
'.event-item',
'.summary',
'.when-where > label:first-of-type',
'.when-where > label:last-of-type',
'.item-banner img',
'.item-banner > a'
)) {
empty = false;
const start = makeDate(item.start as string);
if (!all && now > start) return;
yield { ...item, start, end: null } as Event;
}
if (empty) break;
}
}

继承基类

export abstract class DataCrawler<T> {
declare ['constructor']: typeof DataCrawler;
static baseURI = '';
static schema: URLPattern;
abstract getList(URI: string): AsyncGenerator<T>;
abstract getItem(URI: string): Promise<T>;
makeYAML(list: T[]) {
return stringify(list);
}
@logTime
async saveList(URI: string) {
const list: T[] = [],
folder = join(process.cwd(), 'temp', new URL(URI).pathname);
for await (const item of this.getList(URI)) {
console.log(item);
list.push(item);
}
const file = await saveFile(this.makeYAML(list), folder, 'index.yml');
return { list, folder, file };
}
}

参考实现

export abstract class AgendaCrawler<
A extends Agenda = Agenda,
M extends Mentor = Mentor,
F extends Forum = Forum
> extends DataCrawler<A> {
mentors: M[] = [];
forums: F[] = [];
override makeYAML(agendas: A[]) {
return stringify({
mentors: this.mentors,
forums: this.forums,
agendas
});
}
makeCSV(agendas: A[]) {
const { mentors, forums } = this;
return {
mentors,
forums,
agendas: agendas.map(({ mentor, forum, ...agenda }) => ({
...agenda,
mentor: mentor?.name,
forum: forum?.name
}))
};
}
@logTime
async saveCSV(agendas: A[], folder: string) {
for (const [name, rows] of Object.entries(this.makeCSV(agendas)))
await saveFile(stringifyCSV(rows), folder, `${name}.csv`);
}
@logTime
async saveImagesTo(folder: string) {
for (const { avatar } of this.mentors)
if (avatar) {
const response = await fetch(avatar);
const buffer = Buffer.from(await response.arrayBuffer());
const { ext } = await fromBuffer(buffer);
await saveFile(
buffer,
folder,
'image',
`${parse(avatar).name}.${ext}`
);
}
}
override async saveList(URI: string) {
const { list, folder, file } = await super.saveList(URI);
await this.saveCSV(list, folder);
await this.saveImagesTo(folder);
return { list, folder, file };
}
}

Upvote & Fund

  • We're using Polar.sh so you can upvote and help fund this issue.
  • We receive the funding once the issue is completed & confirmed by you.
  • Thank you in advance for helping prioritize & fund our backlog.
Fund with Polar
@PapayaHUANG
Copy link

我认领!!!!!

@TechQuery
Copy link
Member Author

我认领!!!!!

❤直接 fork + pull request,你不在组织里没法 assign。

@TechQuery TechQuery self-assigned this Aug 17, 2023
@polar-sh polar-sh bot added the Fund label Sep 7, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

2 participants