diff --git a/index.js b/index.js index d5158b4..50b3b49 100644 --- a/index.js +++ b/index.js @@ -17,6 +17,7 @@ const fs = require('fs') "allPackagesOutput" : "/path/to/allpackages.json" , "repositoriesOutput" : "/path/to/repositories.json" , "githubOutput" : "/path/to/githubusers.json" + , "aussieOutput" : "/path/to/aussieOutput.json" , "githubAuthToken" : "yourgithubauthtoken" } diff --git a/load-npm-data.js b/load-npm-data.js index 8610d8e..e7e8e88 100644 --- a/load-npm-data.js +++ b/load-npm-data.js @@ -1,7 +1,9 @@ // if your GitHub location field matches this then we'll guess you're Aussie const GITHUB_REPO_REGEX = /github.com[:\/]([\.\-\w]+)\/([^$\/\.]+)/ - -const npm = require('npm') + , NPM_ALL_PACKAGES_URL = 'https://skimdb.npmjs.com/registry/_all_docs' + , NPM_SINGLE_PACKAGE_URL = 'https://registry.npmjs.org/{packageId}/latest' + , request = require('request').defaults({json:true}) + , async = require('async'); function matchGitHubRepo (npmPackage, repo) { var match = repo @@ -15,34 +17,59 @@ function matchGitHubRepo (npmPackage, repo) { } } -// load the list of all npm libs with 'repo' pointing to GitHub -function loadNpmData (callback) { +function getPackageData (repositories, allPackages, packageData, callback) { + request(NPM_SINGLE_PACKAGE_URL.replace('{packageId}', packageData.id), function (err, response, data) { + if (err) { + // log and continue usually just a timeout, possibly needs retry logic + console.log('error getting data for package: ' + packageData.id, err.message) + return callback() + } + + // Bad maintainers property there are MANY just skip for much speed increase + if (!data.maintainers || !Array.isArray(data.maintainers)) { + return callback() + } + + var repo = matchGitHubRepo(data.name, data.repository); + + if (repo) { + repositories.push(repo) + } + + allPackages.push({ + name : data.name + , maintainers : (data.maintainers || []).map(function (m) { return m && m.name }) + , githubUser : repo ? repo.githubUser : null + , githubRepo : repo ? repo.githubRepo : null + , description : data.description + }) + + callback() + }) +} + +function getAllPackages (callback) { var repositories = [] , allPackages = [] - npm.load(function (err) { - if (err) return callback(err) - - npm.registry.get('/-/all', function (err, data) { - if (err) return callback(err) + // https://github.com/npm/npm-registry-couchapp/issues/162 + request(NPM_ALL_PACKAGES_URL, function(err, response, body){ + if (err) { + return callback(err); + } - Object.keys(data).forEach(function (k) { - var repo = matchGitHubRepo(data[k].name, data[k].repository) - if (repo) - repositories.push(repo) + if (!body || !body.rows) { + body = { rows: [] }; + } - allPackages.push({ - name : data[k].name - , maintainers : (data[k].maintainers || []).map(function (m) { return m.name }) - , githubUser : repo ? repo.githubUser : null - , githubRepo : repo ? repo.githubRepo : null - , description : data[k].description - }) - }) + async.mapLimit(body.rows, 10, getPackageData.bind(null, repositories, allPackages), function (err) { + if (err) { + return callback(err); + } callback(null, { repositories: repositories, allPackages: allPackages }) }) }) } -module.exports = loadNpmData \ No newline at end of file +module.exports = getAllPackages \ No newline at end of file diff --git a/package.json b/package.json index ac44b1a..8d59053 100644 --- a/package.json +++ b/package.json @@ -7,10 +7,9 @@ "author": "", "license": "MIT", "dependencies": { - "npm": "~1.2.18", - "async": "~0.2.7", - "request": "~2.20.0", - "function-rate-limit": "0.0.1" + "async": "~1.5.0", + "function-rate-limit": "0.0.1", + "request": "~2.67.0" }, "private": true -} \ No newline at end of file +}