-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
393 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,21 @@ | ||
#gitlab代码行数统计 | ||
#Gitlab代码行数统计 | ||
|
||
## 环境依赖 | ||
* python3 | ||
|
||
## 获取token | ||
1. 登录gitlab, 点击右上角个人信息Preferences; | ||
2. 左侧选择`Access Tokens`, 输入token名称,选择权限范围(勾选api)和过期时间; | ||
3. 点击`Create personal access token` , 生成的个人账号token。 | ||
|
||
## 统计代码量 | ||
##### Usage | ||
`config.py`文件包含统计脚本需要用到的配置信息 | ||
`run.py`脚本为代码量统计脚本,使用语法: | ||
``` | ||
python3 run.py | ||
``` | ||
|
||
## 致谢 | ||
gitlab_code_statistics初版参考了git-status项目。 | ||
感谢git-status项目原作者EightDoor,项目地址 :https://gitee.com/EightDoor/git-status |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
|
||
""" | ||
File name : config.py | ||
Author : miaoyc | ||
Create date : 2021/9/28 1:38 下午 | ||
Description : 配置模板 | ||
""" | ||
|
||
import datetime | ||
|
||
|
||
# gitlab仓库地址 | ||
git_root_url = "" | ||
# 访问Token | ||
git_token = "" | ||
# 统计结果的存储目录 | ||
export_path = "./dist" | ||
# 统计的时间区间-开始日期 | ||
t_from = "" | ||
# 统计的时间区间-结束日期 | ||
t_end = "" | ||
# 统计的时间区间-开始日期,datetime对象 | ||
date_from = datetime.datetime.strptime(t_from, '%Y-%m-%d') | ||
# 统计的时间区间-结束日期,datetime对象 | ||
date_end = datetime.datetime.strptime(t_end, '%Y-%m-%d') | ||
|
||
# 待统计的仓库列表 | ||
valid_project = [ | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,342 @@ | ||
#!/usr/bin/env python | ||
# coding=utf-8 | ||
|
||
""" | ||
File name : run.py | ||
Author : miaoyc | ||
Create date : 2021/9/28 1:38 下午 | ||
Description : | ||
""" | ||
|
||
import requests | ||
import os | ||
import json | ||
import threading | ||
import datetime | ||
|
||
import config as config | ||
|
||
"""一个线程锁""" | ||
lock = threading.RLock() | ||
|
||
user_unknown = {} | ||
user_email_alias_mapping = {} | ||
user_email_name_mapping = {} | ||
name_with_namespace_list = [] | ||
|
||
|
||
class GitlabApiCountTrueLeTrue: | ||
""" | ||
Worker类 | ||
""" | ||
""" | ||
所有commit的集合,用于去重。 | ||
这里的重复,可能是代码merge造成的 | ||
""" | ||
total_commit_map = {} | ||
|
||
""" | ||
最终的数据集合 | ||
""" | ||
totalMap = {} | ||
|
||
def get_projects(self): | ||
""" | ||
获取所有仓库,并生成报告 | ||
:return: | ||
""" | ||
threads = [] | ||
# 获取服务器上的所有仓库,每个仓库新建一个线程 | ||
for i in range(1, 3): | ||
# 线上gitlab可用,问题是没有全部显示 | ||
url = '%s/api/v4/projects' \ | ||
'?private_token=%s&per_page=1000&page=%d&order_by=last_activity_at' % ( | ||
config.git_root_url, config.git_token, i) | ||
r1 = requests.get(url) # 请求url,传入header,ssl认证为false | ||
r2 = r1.json() # 显示json字符串 | ||
for r3 in r2: | ||
name_with_namespace = r3["name_with_namespace"] | ||
if name_with_namespace not in config.valid_project: | ||
continue | ||
value = r3['default_branch'] | ||
last_active_time = r3['last_activity_at'] | ||
if value is None: | ||
continue | ||
last_active_time = last_active_time.split('+')[0] | ||
days = config.date_from - datetime.datetime.strptime(last_active_time, '%Y-%m-%dT%H:%M:%S.%f') | ||
# 如果project的最后更新时间比起始时间小,则continue | ||
if days.days > 1: | ||
continue | ||
project_info = ProjectInfo() | ||
project_info.project_id = r3['id'] | ||
project_info.name = r3['name'] | ||
project_info.project_desc = r3['description'] | ||
project_info.project_url = r3['web_url'] | ||
project_info.path = r3['path'] | ||
# 构件好线程 | ||
t = threading.Thread( | ||
target=self.get_branches, args=(r3['id'], project_info)) | ||
threads.append(t) | ||
|
||
# 所有线程逐一开始 | ||
for t in threads: | ||
t.start() | ||
# 等待所有线程结束 | ||
for t in threads: | ||
t.join() | ||
final_commit_map = {} | ||
for key, project in self.totalMap.items(): | ||
for author_email, detail in project.commit_map.items(): | ||
exist_detail = final_commit_map.get(detail.author_email) | ||
if exist_detail is None: | ||
final_commit_map[detail.author_email] = detail | ||
else: | ||
exist_detail.total += detail.total | ||
exist_detail.additions += detail.additions | ||
exist_detail.deletions += detail.deletions | ||
final_commit_map[detail.author_email] = exist_detail | ||
write_to_csv("%s/GitStatic_%s/%s_%s.csv" % (config.export_path, config.t_from, 'total', config.t_from), | ||
final_commit_map, "extra") | ||
return | ||
|
||
def get_branches(self, project_id, project_info): | ||
""" | ||
获取仓库的所有Branch,并汇总commit到一个map梨 | ||
:param project_id: | ||
:param project_info: | ||
:return: | ||
""" | ||
print("进入线程:%d,项目id%d,%s" % | ||
(threading.get_ident(), project_id, project_info.project_url)) | ||
# 线上gitlab可用,问题是没有全部显示 | ||
url = '%s/api/v4/projects/%s/repository/branches?private_token=%s' % ( | ||
config.git_root_url, project_id, config.git_token) | ||
print("start get branch list %d,url=%s" % (project_id, url)) | ||
|
||
r1 = requests.get(url) # 请求url,传入header,ssl认证为false | ||
if r1.content == b'Retry later\n': | ||
print("Exception branch: {0}->{1}".format(project_info.project_url, url)) | ||
return | ||
r2 = r1.json() # 显示json字符串 | ||
if not r2: | ||
return | ||
# branch的map,key为branch名称,value为按照提交者email汇总的,key为email的子map集合 | ||
branch_map = {} | ||
# 主动获取master分支的提交 | ||
detail_map = self.get_commits( | ||
project_id, project_info.project_url, 'master') | ||
print("get commits finish project_id=%d branch master" % project_id) | ||
|
||
if detail_map: | ||
branch_map['master'] = detail_map | ||
for r3 in r2: | ||
branch_name = r3['name'] | ||
if branch_name is None: | ||
continue | ||
# 如果仓库已经被Merge了,则不再处理 | ||
if r3['merged']: | ||
continue | ||
detail_map = self.get_commits( | ||
project_id, project_info.project_url, branch_name) | ||
if not detail_map: | ||
continue | ||
# 将结果放到map里 | ||
branch_map[branch_name] = detail_map | ||
print("get commits finish project_id=%d branch %s" % | ||
(project_id, branch_name)) | ||
|
||
print("all branch commits finish %d " % project_id) | ||
|
||
final_commit_map = {} | ||
# 遍历branch map,并按照提交者email进行汇总 | ||
for key, value_map in branch_map.items(): | ||
for author_email, detail in value_map.items(): | ||
exist_detail = final_commit_map.get(detail.author_email) | ||
if exist_detail is None: | ||
final_commit_map[detail.author_email] = detail | ||
else: | ||
exist_detail.total += detail.total | ||
exist_detail.additions += detail.additions | ||
exist_detail.deletions += detail.deletions | ||
final_commit_map[detail.author_email] = exist_detail | ||
|
||
if not final_commit_map: | ||
return | ||
|
||
project_info.commit_map = final_commit_map | ||
# 加锁 | ||
lock.acquire() | ||
# 此对象会被各个线程操作 | ||
self.totalMap[project_info.project_id] = project_info | ||
# 释放锁 | ||
lock.release() | ||
# 汇总完毕后,将结果写入到projectID+日期的csv文件里 | ||
write_to_csv( | ||
"%s/GitStatic_%s/project/%s_%d.csv" % ( | ||
config.export_path, config.t_from, project_info.path, project_info.project_id), | ||
final_commit_map, project_info.project_url) | ||
|
||
def get_commits(self, project_id, project_url, branch_name): | ||
""" | ||
获取指定仓库,指定分支的所有commits,然后遍历每一个commit获得单个branch的统计信息 | ||
:param project_id: | ||
:param project_url: | ||
:param branch_name: | ||
:return: | ||
""" | ||
since_date = config.date_from.strftime('%Y-%m-%dT%H:%M:%S.%fZ') | ||
until_date = config.date_end.strftime('%Y-%m-%dT%H:%M:%S.%fZ') | ||
|
||
url = '%s/api/v4/projects/%s/repository/commits?page=1&per_page=1000&ref_name=%s&since=%s&until=%s&private_token=%s' % ( | ||
config.git_root_url, project_id, branch_name, since_date, until_date, config.git_token) | ||
r1 = requests.get(url) # 请求url,传入header,ssl认证为false | ||
if r1.content == b'Retry later\n': | ||
return | ||
r2 = r1.json() # 显示json字符串 | ||
if not r2: | ||
return | ||
# print('start get_commits,projectID=%d,branch=%s,url=%s' % | ||
# (project_id, branch_name, url)) | ||
print('start get_commits,projectID=%d,branch=%s' % (project_id, branch_name)) | ||
detail_map = {} | ||
|
||
for r3 in r2: | ||
commit_id = r3['id'] | ||
if commit_id is None: | ||
continue | ||
# 在这里进行commit去重判断 | ||
if self.total_commit_map.get(commit_id) is None: | ||
self.total_commit_map[commit_id] = commit_id | ||
else: | ||
continue | ||
# 这里开始获取单次提交详情 | ||
detail = get_commit_detail(project_id, commit_id) | ||
if detail is None: | ||
continue | ||
if detail.total > 5000: | ||
# 单次提交大于5000行的代码,可能是脚手架之类生成的代码,不做处理 | ||
continue | ||
# 这里和主流程无关,是用来处理commit记录里的提交者,账号不规范的问题 | ||
if detail.author_email in user_unknown: | ||
print("email %s projectid= %d,branchname,%s,url=%s" % ( | ||
detail.author_email, project_id, branch_name, project_url)) | ||
|
||
# 根据email纬度,统计提交数据 | ||
exist_detail = detail_map.get(detail.author_email) | ||
if exist_detail is None: | ||
detail_map[detail.author_email] = detail | ||
else: | ||
exist_detail.total += detail.total | ||
exist_detail.additions += detail.additions | ||
exist_detail.deletions += detail.deletions | ||
detail_map[detail.author_email] = exist_detail | ||
return detail_map | ||
|
||
|
||
def get_commit_detail(project_id, commit_id): | ||
""" | ||
获取单个commit的信息 | ||
:param project_id: 工程ID | ||
:param commit_id: commit的id | ||
:return: 返回#CommitDetails对象 | ||
""" | ||
url = '%s/api/v4/projects/%s/repository/commits/%s?private_token=%s' \ | ||
% (config.git_root_url, project_id, commit_id, config.git_token) | ||
r1 = requests.get(url) # 请求url,传入header,ssl认证为false | ||
|
||
if r1.content == b'Retry later\n': | ||
return | ||
|
||
r2 = r1.json() # 显示json字符串 | ||
# print(json.dumps(r2, ensure_ascii=False)) | ||
author_name = r2['author_name'] | ||
author_email = r2['author_email'] | ||
|
||
stats = r2['stats'] | ||
if 'Merge branch' in r2['title']: | ||
return | ||
if stats is None: | ||
return | ||
temp_mail = user_email_alias_mapping.get(author_email) | ||
if temp_mail is not None: | ||
author_email = temp_mail | ||
temp_name = user_email_name_mapping.get(author_email) | ||
if temp_name is not None: | ||
author_name = temp_name | ||
additions = stats['additions'] | ||
deletions = stats['deletions'] | ||
total = stats['total'] | ||
# details = {'additions': additions, 'deletions': deletions, 'total': total, 'author_email': author_email, | ||
# 'author_name': author_name} | ||
details = CommitDetails() | ||
details.additions = additions | ||
details.deletions = deletions | ||
details.total = total | ||
details.author_email = author_email | ||
|
||
details.author_name = author_name | ||
return details | ||
|
||
|
||
def make_dir_safe(file_path): | ||
""" | ||
工具方法:写文件时,如果关联的目录不存在,则进行创建 | ||
:param file_path:文件路径或者文件夹路径 | ||
:return: | ||
""" | ||
if file_path.endswith("/"): | ||
if not os.path.exists(file_path): | ||
os.makedirs(file_path) | ||
else: | ||
folder_path = file_path[0:file_path.rfind('/') + 1] | ||
if not os.path.exists(folder_path): | ||
os.makedirs(folder_path) | ||
|
||
|
||
def write_to_csv(file_path, final_commit_map, extra): | ||
""" | ||
工具方法:将结果写入csv,从#final_commit_map参数解析业务数据 | ||
:param file_path:文件路径 | ||
:param final_commit_map:提交参数 | ||
:param extra:额外数据列 | ||
:return: | ||
""" | ||
make_dir_safe(file_path) | ||
print(file_path) | ||
with open(file_path, 'w') as out: | ||
title = '%s,%s,%s,%s,%s,%s' % ( | ||
"提交人邮箱", "提交人姓名", "总行数", "增加行数", "删除行数", extra) | ||
out.write(title + "\n") | ||
for key, value in final_commit_map.items(): | ||
var = '%s,%s,%s,%s,%s' % ( | ||
value.author_email, value.author_name, value.total, value.additions, value.deletions) | ||
out.write(var + '\n') | ||
out.close() | ||
|
||
|
||
class CommitDetails(json.JSONEncoder): | ||
""" | ||
提交信息的结构体 | ||
""" | ||
author_name = None | ||
author_email = None | ||
additions = 0 | ||
deletions = 0 | ||
total = 0 | ||
|
||
|
||
class ProjectInfo(json.JSONEncoder): | ||
""" | ||
工程信息的结构体 | ||
""" | ||
project_id = None | ||
project_desc = None | ||
project_url = None | ||
path = None | ||
name = None | ||
commit_map = None | ||
|
||
|
||
if __name__ == '__main__': | ||
gitlab4 = GitlabApiCountTrueLeTrue() | ||
gitlab4.get_projects() |