forked from larsxschneider/git-repo-analysis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgit-find-large-files
executable file
·66 lines (56 loc) · 1.83 KB
/
git-find-large-files
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/env bash
#
# Print the largest files in a Git repository. The script must be called
# from the root of the Git repository. You can pass a threshold to print
# only files greater than a certain size (compressed size in Git database,
# default is 500kb).
#
# Files that have a large compressed size should usually be stored in
# Git LFS [].
#
# Based on script from Antony Stubbs [1] and improved with ideas from Peff.
#
# [1] http://stubbisms.wordpress.com/2009/07/10/git-script-to-show-largest-pack-objects-and-trim-your-waist-line/
# [2] https://git-lfs.github.com/
#
# Usage:
# git-find-large-files [size threshold in KB]
#
if [ -z "$1" ]; then
MIN_SIZE_IN_KB=500
else
MIN_SIZE_IN_KB=$1
fi
# set the internal field separator to line break,
# so that we can iterate easily over the verify-pack output
IFS=$'\n';
# list all objects including their size, sort by compressed size
OBJECTS=$(
git cat-file \
--batch-all-objects \
--batch-check='%(objectsize:disk) %(objectname)' \
| sort -nr
)
for OBJ in $OBJECTS; do
# extract the compressed size in kilobytes
COMPRESSED_SIZE=$(($(echo $OBJ | cut -f 1 -d ' ')/1024))
if [ $COMPRESSED_SIZE -le $MIN_SIZE_IN_KB ]; then
break
fi
# extract the SHA
SHA=$(echo $OBJ | cut -f 2 -d ' ')
# find the objects location in the repository tree
LOCATION=$(git rev-list --all --objects | grep $SHA | sed "s/$SHA //")
if git rev-list --all --objects --max-count=1 | grep $SHA >/dev/null; then
# Object is in the head revision
HEAD="Present"
elif [ -e $LOCATION ]; then
# Objects path is in the head revision
HEAD="Changed"
else
# Object nor its path is in the head revision
HEAD="Deleted"
fi
OUTPUT="$OUTPUT\n$COMPRESSED_SIZE,$HEAD,$LOCATION"
done
echo -e $OUTPUT | column -t -s ','