Skip to content

Commit

Permalink
====init====
Browse files Browse the repository at this point in the history
  • Loading branch information
miaoyc666 committed Sep 28, 2021
1 parent 214fdaa commit 1f00558
Show file tree
Hide file tree
Showing 3 changed files with 393 additions and 1 deletion.
21 changes: 20 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,21 @@
#gitlab代码行数统计
#Gitlab代码行数统计

## 环境依赖
* python3

## 获取token
1. 登录gitlab, 点击右上角个人信息Preferences;
2. 左侧选择`Access Tokens`, 输入token名称,选择权限范围(勾选api)和过期时间;
3. 点击`Create personal access token` , 生成的个人账号token。

## 统计代码量
##### Usage
`config.py`文件包含统计脚本需要用到的配置信息
`run.py`脚本为代码量统计脚本,使用语法:
```
python3 run.py
```

## 致谢
gitlab_code_statistics初版参考了git-status项目。
感谢git-status项目原作者EightDoor,项目地址 :https://gitee.com/EightDoor/git-status
31 changes: 31 additions & 0 deletions src/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
File name : config.py
Author : miaoyc
Create date : 2021/9/28 1:38 下午
Description : 配置模板
"""

import datetime


# gitlab仓库地址
git_root_url = ""
# 访问Token
git_token = ""
# 统计结果的存储目录
export_path = "./dist"
# 统计的时间区间-开始日期
t_from = ""
# 统计的时间区间-结束日期
t_end = ""
# 统计的时间区间-开始日期,datetime对象
date_from = datetime.datetime.strptime(t_from, '%Y-%m-%d')
# 统计的时间区间-结束日期,datetime对象
date_end = datetime.datetime.strptime(t_end, '%Y-%m-%d')

# 待统计的仓库列表
valid_project = [
]
342 changes: 342 additions & 0 deletions src/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,342 @@
#!/usr/bin/env python
# coding=utf-8

"""
File name : run.py
Author : miaoyc
Create date : 2021/9/28 1:38 下午
Description :
"""

import requests
import os
import json
import threading
import datetime

import config as config

"""一个线程锁"""
lock = threading.RLock()

user_unknown = {}
user_email_alias_mapping = {}
user_email_name_mapping = {}
name_with_namespace_list = []


class GitlabApiCountTrueLeTrue:
"""
Worker类
"""
"""
所有commit的集合,用于去重。
这里的重复,可能是代码merge造成的
"""
total_commit_map = {}

"""
最终的数据集合
"""
totalMap = {}

def get_projects(self):
"""
获取所有仓库,并生成报告
:return:
"""
threads = []
# 获取服务器上的所有仓库,每个仓库新建一个线程
for i in range(1, 3):
# 线上gitlab可用,问题是没有全部显示
url = '%s/api/v4/projects' \
'?private_token=%s&per_page=1000&page=%d&order_by=last_activity_at' % (
config.git_root_url, config.git_token, i)
r1 = requests.get(url) # 请求url,传入header,ssl认证为false
r2 = r1.json() # 显示json字符串
for r3 in r2:
name_with_namespace = r3["name_with_namespace"]
if name_with_namespace not in config.valid_project:
continue
value = r3['default_branch']
last_active_time = r3['last_activity_at']
if value is None:
continue
last_active_time = last_active_time.split('+')[0]
days = config.date_from - datetime.datetime.strptime(last_active_time, '%Y-%m-%dT%H:%M:%S.%f')
# 如果project的最后更新时间比起始时间小,则continue
if days.days > 1:
continue
project_info = ProjectInfo()
project_info.project_id = r3['id']
project_info.name = r3['name']
project_info.project_desc = r3['description']
project_info.project_url = r3['web_url']
project_info.path = r3['path']
# 构件好线程
t = threading.Thread(
target=self.get_branches, args=(r3['id'], project_info))
threads.append(t)

# 所有线程逐一开始
for t in threads:
t.start()
# 等待所有线程结束
for t in threads:
t.join()
final_commit_map = {}
for key, project in self.totalMap.items():
for author_email, detail in project.commit_map.items():
exist_detail = final_commit_map.get(detail.author_email)
if exist_detail is None:
final_commit_map[detail.author_email] = detail
else:
exist_detail.total += detail.total
exist_detail.additions += detail.additions
exist_detail.deletions += detail.deletions
final_commit_map[detail.author_email] = exist_detail
write_to_csv("%s/GitStatic_%s/%s_%s.csv" % (config.export_path, config.t_from, 'total', config.t_from),
final_commit_map, "extra")
return

def get_branches(self, project_id, project_info):
"""
获取仓库的所有Branch,并汇总commit到一个map梨
:param project_id:
:param project_info:
:return:
"""
print("进入线程:%d,项目id%d,%s" %
(threading.get_ident(), project_id, project_info.project_url))
# 线上gitlab可用,问题是没有全部显示
url = '%s/api/v4/projects/%s/repository/branches?private_token=%s' % (
config.git_root_url, project_id, config.git_token)
print("start get branch list %d,url=%s" % (project_id, url))

r1 = requests.get(url) # 请求url,传入header,ssl认证为false
if r1.content == b'Retry later\n':
print("Exception branch: {0}->{1}".format(project_info.project_url, url))
return
r2 = r1.json() # 显示json字符串
if not r2:
return
# branch的map,key为branch名称,value为按照提交者email汇总的,key为email的子map集合
branch_map = {}
# 主动获取master分支的提交
detail_map = self.get_commits(
project_id, project_info.project_url, 'master')
print("get commits finish project_id=%d branch master" % project_id)

if detail_map:
branch_map['master'] = detail_map
for r3 in r2:
branch_name = r3['name']
if branch_name is None:
continue
# 如果仓库已经被Merge了,则不再处理
if r3['merged']:
continue
detail_map = self.get_commits(
project_id, project_info.project_url, branch_name)
if not detail_map:
continue
# 将结果放到map里
branch_map[branch_name] = detail_map
print("get commits finish project_id=%d branch %s" %
(project_id, branch_name))

print("all branch commits finish %d " % project_id)

final_commit_map = {}
# 遍历branch map,并按照提交者email进行汇总
for key, value_map in branch_map.items():
for author_email, detail in value_map.items():
exist_detail = final_commit_map.get(detail.author_email)
if exist_detail is None:
final_commit_map[detail.author_email] = detail
else:
exist_detail.total += detail.total
exist_detail.additions += detail.additions
exist_detail.deletions += detail.deletions
final_commit_map[detail.author_email] = exist_detail

if not final_commit_map:
return

project_info.commit_map = final_commit_map
# 加锁
lock.acquire()
# 此对象会被各个线程操作
self.totalMap[project_info.project_id] = project_info
# 释放锁
lock.release()
# 汇总完毕后,将结果写入到projectID+日期的csv文件里
write_to_csv(
"%s/GitStatic_%s/project/%s_%d.csv" % (
config.export_path, config.t_from, project_info.path, project_info.project_id),
final_commit_map, project_info.project_url)

def get_commits(self, project_id, project_url, branch_name):
"""
获取指定仓库,指定分支的所有commits,然后遍历每一个commit获得单个branch的统计信息
:param project_id:
:param project_url:
:param branch_name:
:return:
"""
since_date = config.date_from.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
until_date = config.date_end.strftime('%Y-%m-%dT%H:%M:%S.%fZ')

url = '%s/api/v4/projects/%s/repository/commits?page=1&per_page=1000&ref_name=%s&since=%s&until=%s&private_token=%s' % (
config.git_root_url, project_id, branch_name, since_date, until_date, config.git_token)
r1 = requests.get(url) # 请求url,传入header,ssl认证为false
if r1.content == b'Retry later\n':
return
r2 = r1.json() # 显示json字符串
if not r2:
return
# print('start get_commits,projectID=%d,branch=%s,url=%s' %
# (project_id, branch_name, url))
print('start get_commits,projectID=%d,branch=%s' % (project_id, branch_name))
detail_map = {}

for r3 in r2:
commit_id = r3['id']
if commit_id is None:
continue
# 在这里进行commit去重判断
if self.total_commit_map.get(commit_id) is None:
self.total_commit_map[commit_id] = commit_id
else:
continue
# 这里开始获取单次提交详情
detail = get_commit_detail(project_id, commit_id)
if detail is None:
continue
if detail.total > 5000:
# 单次提交大于5000行的代码,可能是脚手架之类生成的代码,不做处理
continue
# 这里和主流程无关,是用来处理commit记录里的提交者,账号不规范的问题
if detail.author_email in user_unknown:
print("email %s projectid= %d,branchname,%s,url=%s" % (
detail.author_email, project_id, branch_name, project_url))

# 根据email纬度,统计提交数据
exist_detail = detail_map.get(detail.author_email)
if exist_detail is None:
detail_map[detail.author_email] = detail
else:
exist_detail.total += detail.total
exist_detail.additions += detail.additions
exist_detail.deletions += detail.deletions
detail_map[detail.author_email] = exist_detail
return detail_map


def get_commit_detail(project_id, commit_id):
"""
获取单个commit的信息
:param project_id: 工程ID
:param commit_id: commit的id
:return: 返回#CommitDetails对象
"""
url = '%s/api/v4/projects/%s/repository/commits/%s?private_token=%s' \
% (config.git_root_url, project_id, commit_id, config.git_token)
r1 = requests.get(url) # 请求url,传入header,ssl认证为false

if r1.content == b'Retry later\n':
return

r2 = r1.json() # 显示json字符串
# print(json.dumps(r2, ensure_ascii=False))
author_name = r2['author_name']
author_email = r2['author_email']

stats = r2['stats']
if 'Merge branch' in r2['title']:
return
if stats is None:
return
temp_mail = user_email_alias_mapping.get(author_email)
if temp_mail is not None:
author_email = temp_mail
temp_name = user_email_name_mapping.get(author_email)
if temp_name is not None:
author_name = temp_name
additions = stats['additions']
deletions = stats['deletions']
total = stats['total']
# details = {'additions': additions, 'deletions': deletions, 'total': total, 'author_email': author_email,
# 'author_name': author_name}
details = CommitDetails()
details.additions = additions
details.deletions = deletions
details.total = total
details.author_email = author_email

details.author_name = author_name
return details


def make_dir_safe(file_path):
"""
工具方法:写文件时,如果关联的目录不存在,则进行创建
:param file_path:文件路径或者文件夹路径
:return:
"""
if file_path.endswith("/"):
if not os.path.exists(file_path):
os.makedirs(file_path)
else:
folder_path = file_path[0:file_path.rfind('/') + 1]
if not os.path.exists(folder_path):
os.makedirs(folder_path)


def write_to_csv(file_path, final_commit_map, extra):
"""
工具方法:将结果写入csv,从#final_commit_map参数解析业务数据
:param file_path:文件路径
:param final_commit_map:提交参数
:param extra:额外数据列
:return:
"""
make_dir_safe(file_path)
print(file_path)
with open(file_path, 'w') as out:
title = '%s,%s,%s,%s,%s,%s' % (
"提交人邮箱", "提交人姓名", "总行数", "增加行数", "删除行数", extra)
out.write(title + "\n")
for key, value in final_commit_map.items():
var = '%s,%s,%s,%s,%s' % (
value.author_email, value.author_name, value.total, value.additions, value.deletions)
out.write(var + '\n')
out.close()


class CommitDetails(json.JSONEncoder):
"""
提交信息的结构体
"""
author_name = None
author_email = None
additions = 0
deletions = 0
total = 0


class ProjectInfo(json.JSONEncoder):
"""
工程信息的结构体
"""
project_id = None
project_desc = None
project_url = None
path = None
name = None
commit_map = None


if __name__ == '__main__':
gitlab4 = GitlabApiCountTrueLeTrue()
gitlab4.get_projects()

0 comments on commit 1f00558

Please sign in to comment.