Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ go run ./cmd/ghactivities \
--until End date in ISO8601 format (default: now)
--visibility Repository visibility: public, private, all (default: public)
--max-length-size Max output file size such as 1B, 2K, 2M (default: 1M)
--max-tokens Max output file tokens from rendered JSON (default: 0, disabled)
--order Event order: asc, desc (default: asc)
--help Show help
```
Expand All @@ -95,12 +96,13 @@ Notes:
- `--since` and `--until` must be valid RFC3339 / ISO8601 timestamps such as `2026-03-15T12:00:00Z`.
- `--visibility private` limits results to repositories GitHub reports as private.
- `--order asc` returns oldest-first output; `--order desc` returns newest-first output.
- `--max-tokens` counts tokens from the formatted JSON content using `github.com/tiktoken-go/tokenizer` with the `o200k_base` encoding.

## Output files and splitting

By default, `ghactivities` writes a formatted JSON array to `./ghactivities.json`.

If the rendered JSON exceeds `--max-length-size`, `ghactivities` automatically splits the result into numbered files that keep the same base name and extension:
If the rendered JSON exceeds `--max-length-size` or `--max-tokens`, `ghactivities` automatically splits the result into numbered files that keep the same base name and extension:

- `./ghactivities_1.json`
- `./ghactivities_2.json`
Expand All @@ -111,11 +113,16 @@ For example:
```bash
go run ./cmd/ghactivities \
--output ./exports/activity.json \
--max-length-size 256K
--max-length-size 256K \
--max-tokens 20000
```

This produces either `./exports/activity.json` or, when splitting is needed, files like `./exports/activity_1.json`, `./exports/activity_2.json`, and so on.

Splitting keeps the existing event order intact and fills files in sequence. When both limits are set, `ghactivities` uses whichever limit is reached first for each chunk.

If a single rendered event already exceeds `--max-length-size`, it is still written as its own numbered file because it cannot be split further. If a single rendered event exceeds `--max-tokens`, `ghactivities` returns an error instead of silently writing a file above the requested token cap.

## Development setup

This repository uses `mise` for contributor setup and toolchain management.
Expand Down
2 changes: 1 addition & 1 deletion cmd/ghactivities/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func run(
}

sorted := util.SortEvents(events, options.Order)
files, err := output.WriteEventsToFiles(sorted, options.Output, options.MaxLengthSize)
files, err := output.WriteEventsToFiles(sorted, options.Output, options.MaxLengthSize, options.MaxTokens)
if err != nil {
return err
}
Expand Down
4 changes: 4 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
module github.com/dyoshikawa/ghactivities

go 1.25.0

require github.com/tiktoken-go/tokenizer v0.7.0

require github.com/dlclark/regexp2 v1.11.5 // indirect
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZQ=
github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/tiktoken-go/tokenizer v0.7.0 h1:VMu6MPT0bXFDHr7UPh9uii7CNItVt3X9K90omxL54vw=
github.com/tiktoken-go/tokenizer v0.7.0/go.mod h1:6UCYI/DtOallbmL7sSy30p6YQv60qNyU/4aVigPOx6w=
9 changes: 9 additions & 0 deletions internal/cli/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ type Options struct {
Until time.Time
Visibility string
MaxLengthSize int
MaxTokens int
Order string
}

Expand All @@ -34,6 +35,7 @@ func ParseArgs(argv []string, now func() time.Time) (Options, error) {
until := fs.String("until", defaults.Until.Format(time.RFC3339Nano), "End date in ISO8601 format")
visibility := fs.String("visibility", defaults.Visibility, "Repository visibility")
maxLengthSize := fs.String("max-length-size", fmt.Sprintf("%dB", defaults.MaxLengthSize), "Max output file size")
maxTokens := fs.Int("max-tokens", defaults.MaxTokens, "Max output file tokens")
order := fs.String("order", defaults.Order, "Event order")
help := fs.Bool("help", false, "Show help")

Expand Down Expand Up @@ -71,13 +73,18 @@ func ParseArgs(argv []string, now func() time.Time) (Options, error) {
return Options{}, fmt.Errorf("invalid value for --max-length-size: %w", err)
}

if *maxTokens < 0 {
return Options{}, fmt.Errorf("invalid value for --max-tokens: must be 0 or greater")
}

return Options{
GitHubToken: *githubToken,
Output: *output,
Since: parsedSince,
Until: parsedUntil,
Visibility: *visibility,
MaxLengthSize: parsedSize,
MaxTokens: *maxTokens,
Order: *order,
}, nil
}
Expand All @@ -92,6 +99,7 @@ Options:
--until End date in ISO8601 format (default: now)
--visibility Repository visibility: public, private, all (default: public)
--max-length-size Max output file size: e.g. 1B, 2K, 2M (default: 1M)
--max-tokens Max output file tokens from rendered JSON (default: 0, disabled)
--order Event order: asc, desc (default: asc)
--help Show this help message
`, "\n")
Expand All @@ -105,6 +113,7 @@ func defaultOptions(now func() time.Time) Options {
Until: current,
Visibility: "public",
MaxLengthSize: 1024 * 1024,
MaxTokens: 0,
Order: "asc",
}
}
Expand Down
8 changes: 8 additions & 0 deletions internal/cli/options_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ func TestParseArgsParsesAllOptions(t *testing.T) {
"--until", "2024-06-01T00:00:00Z",
"--visibility", "all",
"--max-length-size", "2M",
"--max-tokens", "1234",
"--order", "desc",
}, now)
if err != nil {
Expand All @@ -40,6 +41,9 @@ func TestParseArgsParsesAllOptions(t *testing.T) {
if result.MaxLengthSize != 2*1024*1024 {
t.Fatalf("MaxLengthSize = %d, want %d", result.MaxLengthSize, 2*1024*1024)
}
if result.MaxTokens != 1234 {
t.Fatalf("MaxTokens = %d, want %d", result.MaxTokens, 1234)
}
if result.Order != "desc" {
t.Fatalf("Order = %q, want %q", result.Order, "desc")
}
Expand All @@ -61,6 +65,9 @@ func TestParseArgsUsesDefaults(t *testing.T) {
if result.MaxLengthSize != 1024*1024 {
t.Fatalf("MaxLengthSize = %d, want %d", result.MaxLengthSize, 1024*1024)
}
if result.MaxTokens != 0 {
t.Fatalf("MaxTokens = %d, want %d", result.MaxTokens, 0)
}
if result.Order != "asc" {
t.Fatalf("Order = %q, want %q", result.Order, "asc")
}
Expand All @@ -83,6 +90,7 @@ func TestParseArgsRejectsInvalidValues(t *testing.T) {
{name: "order", argv: []string{"--order", "random"}},
{name: "since", argv: []string{"--since", "not-a-date"}},
{name: "size", argv: []string{"--max-length-size", "abc"}},
{name: "tokens", argv: []string{"--max-tokens", "-1"}},
}

for _, tt := range tests {
Expand Down
108 changes: 103 additions & 5 deletions internal/output/files.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,42 @@ import (
"strings"

"github.com/dyoshikawa/ghactivities/internal/events"
"github.com/tiktoken-go/tokenizer"
)

func WriteEventsToFiles(items []events.Event, output string, maxLengthSize int) ([]string, error) {
type splitConstraints struct {
maxLengthSize int
maxTokens int
countTokens func([]byte) (int, error)
}

func WriteEventsToFiles(items []events.Event, output string, maxLengthSize int, maxTokens int) ([]string, error) {
constraints, err := newSplitConstraints(maxLengthSize, maxTokens)
if err != nil {
return nil, err
}

content, err := marshalEvents(items)
if err != nil {
return nil, err
}

if len(content) <= maxLengthSize {
fits, err := constraints.fits(content)
if err != nil {
return nil, err
}

if fits {
if err := os.WriteFile(output, content, 0o644); err != nil {
return nil, fmt.Errorf("write output file: %w", err)
}
return []string{output}, nil
}

return splitAndWriteFiles(items, output, maxLengthSize)
return splitAndWriteFiles(items, output, constraints)
}

func splitAndWriteFiles(items []events.Event, output string, maxLengthSize int) ([]string, error) {
func splitAndWriteFiles(items []events.Event, output string, constraints splitConstraints) ([]string, error) {
dir := filepath.Dir(output)
ext := filepath.Ext(output)
base := strings.TrimSuffix(filepath.Base(output), ext)
Expand All @@ -43,11 +60,22 @@ func splitAndWriteFiles(items []events.Event, output string, maxLengthSize int)
return nil, err
}

if len(content) <= maxLengthSize {
fits, err := constraints.fits(content)
if err != nil {
return nil, err
}

if fits {
continue
}

if len(chunk) == 1 {
if exceedsTokens, err := constraints.exceedsTokens(content); err != nil {
return nil, err
} else if exceedsTokens {
return nil, fmt.Errorf("single event exceeds --max-tokens limit after JSON rendering")
}

path := numberedPath(dir, base, ext, fileIndex)
if err := os.WriteFile(path, content, 0o644); err != nil {
return nil, fmt.Errorf("write output file: %w", err)
Expand All @@ -63,6 +91,9 @@ func splitAndWriteFiles(items []events.Event, output string, maxLengthSize int)
if err != nil {
return nil, err
}
if err := constraints.validateChunk(previousContent); err != nil {
return nil, err
}

path := numberedPath(dir, base, ext, fileIndex)
if err := os.WriteFile(path, previousContent, 0o644); err != nil {
Expand All @@ -78,6 +109,9 @@ func splitAndWriteFiles(items []events.Event, output string, maxLengthSize int)
if err != nil {
return nil, err
}
if err := constraints.validateChunk(content); err != nil {
return nil, err
}
path := numberedPath(dir, base, ext, fileIndex)
if err := os.WriteFile(path, content, 0o644); err != nil {
return nil, fmt.Errorf("write output file: %w", err)
Expand All @@ -88,6 +122,70 @@ func splitAndWriteFiles(items []events.Event, output string, maxLengthSize int)
return files, nil
}

func newSplitConstraints(maxLengthSize int, maxTokens int) (splitConstraints, error) {
constraints := splitConstraints{
maxLengthSize: maxLengthSize,
maxTokens: maxTokens,
}

if maxTokens <= 0 {
return constraints, nil
}

codec, err := tokenizer.Get(tokenizer.O200kBase)
if err != nil {
return splitConstraints{}, fmt.Errorf("load tokenizer: %w", err)
}

constraints.countTokens = func(content []byte) (int, error) {
count, err := codec.Count(string(content))
if err != nil {
return 0, fmt.Errorf("count rendered JSON tokens: %w", err)
}
return count, nil
}

return constraints, nil
}

func (constraints splitConstraints) fits(content []byte) (bool, error) {
if len(content) > constraints.maxLengthSize {
return false, nil
}

exceedsTokens, err := constraints.exceedsTokens(content)
if err != nil {
return false, err
}

return !exceedsTokens, nil
}

func (constraints splitConstraints) exceedsTokens(content []byte) (bool, error) {
if constraints.maxTokens <= 0 {
return false, nil
}

tokenCount, err := constraints.countTokens(content)
if err != nil {
return false, err
}

return tokenCount > constraints.maxTokens, nil
}

func (constraints splitConstraints) validateChunk(content []byte) error {
exceedsTokens, err := constraints.exceedsTokens(content)
if err != nil {
return err
}
if exceedsTokens {
return fmt.Errorf("single event exceeds --max-tokens limit after JSON rendering")
}

return nil
}

func numberedPath(dir string, base string, ext string, index int) string {
return filepath.Join(dir, fmt.Sprintf("%s_%d%s", base, index, ext))
}
Expand Down
Loading
Loading