-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
GithubCrawl - works without persistence
- it's working with in-memory representations - persistence is disabled - Sqlite persistence is possible, but the serializers are not working yet - see lostisland/sawyer#53
- Loading branch information
1 parent
67bf7ba
commit 1cf8654
Showing
18 changed files
with
464 additions
and
49 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,3 +12,7 @@ | |
|
||
# disk db | ||
github_crawl.db | ||
|
||
# logs | ||
log/sql.log | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,10 @@ | ||
#!/usr/bin/env ruby | ||
|
||
require "bundler/setup" | ||
require "github_crawl" | ||
require 'bundler/setup' | ||
require 'github_crawl' | ||
|
||
# You can add fixtures and/or initialization code here to make experimenting | ||
# with your gem easier. You can also use a different console, if you like. | ||
|
||
require "pry" | ||
require 'pry' | ||
Pry.start |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
#!/usr/bin/env ruby | ||
|
||
require 'bundler/setup' | ||
require 'github_crawl' | ||
require 'highline' | ||
require 'set' | ||
|
||
# --- | ||
# Command Line Prompts and Configuration | ||
|
||
cli = HighLine.new | ||
|
||
repo_name = ENV['GITHUB_REPO'] || cli.ask('github repo in the form "{owner}/{repo}": ') | ||
repo_name ||= 'kubernetes/kubernetes' | ||
|
||
github_user = ENV['GITHUB_USER'] || cli.ask('github user: ') | ||
github_pass = ENV['GITHUB_PASS'] || cli.ask('github pass: ') { |q| q.echo = '*' } | ||
unless github_user.nil? && github_pass.nil? | ||
Octokit.configure do |c| | ||
c.login = github_user | ||
c.password = github_pass | ||
end | ||
# # TODO: try to use an auth-token | ||
# auth = Octokit.create_authorization(:scopes => ["user"], :note => "GithubCrawl") | ||
# Octokit.bearer_token = auth[:token] | ||
end | ||
|
||
Octokit.auto_paginate = true | ||
|
||
|
||
# --- | ||
# Github Crawling by repo | ||
# | ||
# TODO: try to use https://developer.github.com/v3/#conditional-requests | ||
|
||
# Accumulate repository information in this repos hash; note that the | ||
# keys are repository "name" strings and not "full_name" strings. | ||
repos = {} | ||
|
||
begin | ||
repo = GithubCrawl::Repo.new(full_name: repo_name) | ||
contributors = repo.contributors | ||
repos[repo.name] = contributors.map(&:login).to_set | ||
|
||
contributors.each do |user| | ||
GithubCrawl.check_rate_limit | ||
user.repos.each do |user_repo| | ||
# Q: a user could fork a repository without ever contributing to it; so | ||
# does this need to check whether a user is also a contributor to a repository? | ||
repos[user_repo.name] ||= Set.new | ||
repos[user_repo.name] << user.login | ||
end | ||
end | ||
rescue StandardError => err | ||
puts err.message | ||
end | ||
|
||
# --- | ||
# Report the most popular repositories among the contributors | ||
|
||
# sort the repos by the number of users who list them among their repositories | ||
repos_sorted = repos.sort { |r1, r2| r2[1].size <=> r1[1].size } | ||
|
||
# report the repo contributor count and it's name for the top 10 repos | ||
repos_sorted.slice(0, 9).map { |repo| puts "#{repo[1].size}: #{repo[0]}" } | ||
|
||
# It's interesting to pause here to inspect all the data. For example: | ||
# repos.length | ||
# repos.values.map(&:length).uniq | ||
# repos_sorted.slice(0, 9).map { |repo| puts "#{repo[1].size}: #{repo[0]}\n\t#{repo[1].sort}" } | ||
|
||
# Cnt-D or exit! to quit | ||
require 'pry' | ||
binding.pry | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,21 @@ | ||
require 'daybreak' | ||
require 'octokit' | ||
|
||
require 'github_crawl/version' | ||
# Github data | ||
require 'octokit' | ||
require 'github_crawl/repo' | ||
require 'github_crawl/user' | ||
|
||
# Local persistence | ||
require 'sequel' | ||
require 'sqlite3' | ||
require 'github_crawl/sawyer_serializer' | ||
require 'github_crawl/sql_db' | ||
require 'github_crawl/sql_base' | ||
require 'github_crawl/sql_repos' | ||
require 'github_crawl/sql_users' | ||
|
||
# Serializers | ||
require 'json' | ||
require 'yaml' | ||
|
||
require 'github_crawl/version' | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,18 @@ | ||
require 'boot' | ||
|
||
module GithubCrawl | ||
DB = Daybreak::DB.new 'github_crawl.db' | ||
DB = SqlDb.new | ||
|
||
# Check the rate limit | ||
# @return [void] | ||
def self.check_rate_limit | ||
response = Octokit.last_response | ||
return if response.nil? | ||
rate_limit = response.headers['x-ratelimit-limit'].to_i # hits per hour | ||
rate_remaining = response.headers['x-ratelimit-remaining'].to_i | ||
rate_reset = response.headers['x-ratelimit-reset'].to_i | ||
return if rate_remaining > 100 | ||
puts "WARNING: rate limit (#{rate_limit}) remainder: #{rate_remaining}" | ||
puts "WARNING: rate limit (#{rate_limit}) resets at #{Time.at(rate_reset)}" | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
module GithubCrawl | ||
|
||
# Serialize and deserialize Sawyer Resource data | ||
module SawyerSerializer | ||
|
||
# @param [Sawyer::Resource] sawyer_resource | ||
# @return [String] serialized data | ||
def serialize(sawyer_resource) | ||
# attrs = sawyer_resource.attrs.to_h.to_json | ||
# rels = sawyer_resource.rels.to_h.to_json | ||
# fields = sawyer_resource.fields.to_a.to_json | ||
Marshal.dump(sawyer_resource.marshal_dump) | ||
end | ||
|
||
# @param [String] serialized data | ||
# @return [Sawyer::Resource] sawyer_resource | ||
def deserialize(dumped) | ||
agent = Sawyer::Agent.new('https://api.github.com/', | ||
links_parser: Sawyer::LinkParsers::Simple.new) | ||
resource = Sawyer::Resource.new(agent) | ||
resource.marshal_load(Marshal.restore(dumped)) | ||
resource | ||
end | ||
end | ||
end |
Oops, something went wrong.