Skip to content

Commit

Permalink
Implement generation of authors gexf in fetch-authors.ts
Browse files Browse the repository at this point in the history
This adds the authors of originals.yaml to authors.json and creates
authors-network.gexf.
  • Loading branch information
cmil committed Jan 14, 2025
1 parent c948d3c commit 1c51f0b
Show file tree
Hide file tree
Showing 3 changed files with 3,876 additions and 8 deletions.
143 changes: 135 additions & 8 deletions fetch-authors.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import axios from 'axios';
import {loadAll, CORE_SCHEMA} from 'js-yaml';
import {readFileSync, writeFileSync} from 'fs';
import {Play} from './src/types';
import {DOMParser, XMLSerializer} from '@xmldom/xmldom';
import {Author, OriginalPlay, Play} from './src/types';

import authorData from './src/authors.json';

interface Author {
interface AuthorRecord {
name: string;
gender?: string;
gnd?: string;
Expand All @@ -22,14 +23,29 @@ interface Author {
};
ambiguous?: boolean;
}
interface Node {
name: string;
fullname?: string;
gender?: string;
}

const authors: {[index: string]: Author} = {...authorData};
interface Edge {
source: string;
target: string;
weight: number;
}

const authors: {[index: string]: AuthorRecord} = {...authorData};

let data: Play[] = [];
let originals: OriginalPlay[] = [];
try {
data = loadAll(readFileSync('./data.yaml', 'utf8'), null, {
schema: CORE_SCHEMA,
}) as Play[];
originals = loadAll(readFileSync('./originals.yaml', 'utf8'), null, {
schema: CORE_SCHEMA,
}) as OriginalPlay[];
} catch (error) {
console.log(error);
}
Expand All @@ -40,17 +56,26 @@ const plays = data.map((p: Play) => {
return play;
});

originals.forEach((o) => {
o.authors = o.author ? [o.author] : o.authors || [];
delete o.author;
});

const argv = process.argv.slice(2);

const authorIds = plays
.map((p) => p.authors.filter((a) => a.wikidata).map((a) => a.wikidata))
.flat()
const authorIds = [
plays.map((p) => p.authors.filter((a) => a.wikidata).map((a) => a.wikidata)),
originals.map((p) =>
p.authors?.filter((a) => a.wikidata).map((a) => a.wikidata)
),
]
.flat(2)
.filter((id, index, self) => self.indexOf(id) === index);

const endpoint = 'https://query.wikidata.org/sparql';

async function fetchAuthors() {
const results: {[index: string]: unknown} = {};
const results: {[index: string]: AuthorRecord} = {};
for (let i = 0; i < authorIds.length; i++) {
const id = authorIds[i];
const author = authors[id as string];
Expand Down Expand Up @@ -89,7 +114,7 @@ WHERE {
}
const a = sparqlResults[0];

const newAuthor: Author = {
const newAuthor: AuthorRecord = {
name: a.authorLabel.value,
gnd: a.gnd?.value,
image: a.img?.value,
Expand Down Expand Up @@ -128,6 +153,108 @@ WHERE {
}

writeFileSync('./src/authors.json', JSON.stringify(results, null, 2));

// Assemble authors network
interface Map<T> {
[slug: string]: T;
}
const map: Map<OriginalPlay> = originals.reduce((acc, o) => {
acc[o.slug as string] = o;
return acc;
}, {} as Map<OriginalPlay>);

const nodes: {[id: string]: Node} = {};
const edges: {[id: string]: Edge} = {};

plays
.filter((p) => p.basedOn)
.map((p: Play) => {
const basedOn = p.basedOn?.map((ref) => map[ref as string] || ref) || [];
const play = {...p};
if (basedOn.length > 0) play.basedOn = basedOn;
return play;
})
.forEach((p) => {
const originalAuthors: Author[] = [];
p.basedOn?.forEach((o) => {
if (typeof o !== 'string') {
originals.push(o);
o.authors?.forEach((a) => originalAuthors.push(a));
}
});

p.authors?.forEach(({name, wikidata}) => {
if (wikidata) {
if (!nodes[wikidata]) {
nodes[wikidata] = {
name: name || '?',
fullname: results[wikidata].name,
};
}
originalAuthors.forEach((o) => {
if (o.wikidata) {
if (!nodes[o.wikidata]) {
nodes[o.wikidata] = {
name: o.name || '?',
fullname: results[o.wikidata].name || name,
};
}
const edgeId = `${wikidata}|${o.wikidata}`;
if (edges[edgeId]) {
edges[edgeId].weight++;
} else {
edges[edgeId] = {
source: wikidata,
target: o.wikidata,
weight: 1,
};
}
}
});
}
});
});

// console.log({nodes, edges});

const date = new Date().toISOString();
const stub = `<?xml version="1.0" encoding="UTF-8"?>
<gexf xmlns="http://gexf.net/1.2" version="1.2">
<meta lastmodifieddate="${date}">
<creator>einakter.dracor.org</creator>
<description>Einakter authors network</description>
</meta>
<graph mode="static" defaultedgetype="directed">
<nodes/>
<edges/>
</graph>
</gexf>`;

const doc = new DOMParser().parseFromString(stub, 'text/xml');

const nodesNode = doc.getElementsByTagName('nodes')[0];
Object.entries(nodes).forEach(([id, node]) => {
const elem = doc.createElement('node');
elem.setAttribute('id', id);
elem.setAttribute('label', node.fullname || node.name);
nodesNode.appendChild(elem);
const attvalue = doc.createElement('attvalue');
attvalue.setAttribute('for', 'gender');
attvalue.setAttribute('value', results[id].gender || '');
elem.appendChild(doc.createElement('attvalues').appendChild(attvalue));
});
const edgesNode = doc.getElementsByTagName('edges')[0];
Object.entries(edges).forEach(([id, edge]) => {
const elem = doc.createElement('edge');
elem.setAttribute('id', id);
elem.setAttribute('source', edge.source);
elem.setAttribute('target', edge.target);
elem.setAttribute('weight', edge.weight.toString());
edgesNode.appendChild(elem);
});

const gexf = new XMLSerializer().serializeToString(doc);
writeFileSync('./src/authors-network.gexf', gexf);
}

fetchAuthors();
Loading

0 comments on commit 1c51f0b

Please sign in to comment.