diff --git a/data/README.md b/data/README.md index caf08f3..aca5253 100644 --- a/data/README.md +++ b/data/README.md @@ -13,7 +13,7 @@ manually. Latest corpus stats: - 7318 total lines -- 5331 generated usernames +- 5331 unique usernames - 100 unique memes Movie titles used: diff --git a/data/generate-corpus.js b/data/generate-corpus.js index 13256be..0692a27 100755 --- a/data/generate-corpus.js +++ b/data/generate-corpus.js @@ -132,9 +132,12 @@ fs.writeFileSync('corpus.json', JSON.stringify({ dialogs, usernames })); console.log('> corpus generated successfully'); console.log(` -> latest corpus stats: -> ${numLines} total lines -> ${usernames.length} generated usernames -> -> movie titles used: -> ${filteredMovies.map((movie) => movie.title).join('\n> ')}`); +Latest corpus stats: + +- ${numLines} total lines +- ${usernames.length} unique usernames +- 100 unique memes + +Movie titles used: + +- ${filteredMovies.map((movie) => movie.title).join('\n- ')}`);