Skip to content

Commit 0bf599e

Browse files
authored
Fix Github files URL parsing (#896)
* Fix Github files URL parsing * changeset * update comments * Add test + more secure parsing
1 parent 619c144 commit 0bf599e

File tree

5 files changed

+248
-48
lines changed

5 files changed

+248
-48
lines changed

.changeset/six-buckets-pull.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@gitbook/integration-github-files': patch
3+
---
4+
5+
Fix Github files URL parsing

integrations/github-files/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
"typecheck": "tsc --noEmit",
1717
"check": "gitbook check",
1818
"publish-integrations": "dotenv gitbook publish .",
19-
"publish-integrations-staging": "gitbook publish ."
19+
"publish-integrations-staging": "gitbook publish .",
20+
"test": "bun test"
2021
}
2122
}

integrations/github-files/src/github.ts

Lines changed: 53 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,66 @@
11
import { ExposableError } from '@gitbook/runtime';
2-
import { GithubInstallationConfiguration, GithubRuntimeContext } from './types';
2+
import type { GithubInstallationConfiguration, GithubRuntimeContext } from './types';
33

44
export interface GithubProps {
55
url: string;
66
}
77

8-
const splitGithubUrl = (url: string) => {
9-
const permalinkRegex =
10-
/^https?:\/\/github\.com\/([\w-]+)\/([\w-]+)\/blob\/([a-f0-9]+)\/(.+?)#(.+)$/;
11-
const wholeFileRegex = /^https?:\/\/github\.com\/([\w-]+)\/([\w-]+)\/blob\/([\w.-]+)\/(.+)$/;
12-
const multipleLineRegex = /^L\d+-L\d+$/;
8+
/**
9+
* Extract the parts from a github URL
10+
*/
11+
export const splitGithubUrl = (url: string) => {
12+
if (!url) {
13+
return undefined;
14+
}
1315

14-
let orgName = '';
15-
let repoName = '';
16-
let ref = '';
17-
let fileName = '';
18-
let lines: number[] = [];
16+
let urlObject: URL;
17+
try {
18+
urlObject = new URL(url);
19+
} catch {
20+
return undefined;
21+
}
1922

20-
if (url.match(permalinkRegex)) {
21-
const match = url.match(permalinkRegex);
22-
if (!match) {
23-
return;
24-
}
23+
// Check if the URL is a valid GitHub URL
24+
if (urlObject.hostname !== 'github.com') {
25+
return undefined;
26+
}
2527

26-
orgName = match[1];
27-
repoName = match[2];
28-
ref = match[3];
29-
fileName = match[4];
30-
const hash = match[5];
31-
32-
if (hash !== '') {
33-
if (url.match(permalinkRegex)) {
34-
if (hash.match(multipleLineRegex)) {
35-
lines = hash.replace(/L/g, '').split('-').map(Number);
36-
} else {
37-
const singleLineNumberArray: number[] = [];
38-
const parsedInt = parseInt(hash.replace(/L/g, ''), 10);
39-
singleLineNumberArray.push(parsedInt);
40-
singleLineNumberArray.push(parsedInt);
41-
lines = singleLineNumberArray;
42-
}
43-
}
44-
}
45-
} else if (url.match(wholeFileRegex)) {
46-
const match = url.match(wholeFileRegex);
47-
if (!match) {
48-
return;
49-
}
28+
const baseRegex = /([\w-]+)\/([a-zA-Z0-9._-]+)\/blob\/(.+)$/;
29+
// Keep the hash part of the URL for lines detection
30+
const path = `${urlObject.pathname}${urlObject.hash}`;
31+
const baseMatch = path.match(baseRegex);
5032

51-
orgName = match[1];
52-
repoName = match[2];
53-
ref = match[3];
54-
fileName = match[4];
33+
if (!baseMatch) {
34+
return undefined;
5535
}
36+
37+
const orgName = baseMatch[1];
38+
const repoName = baseMatch[2];
39+
const restOfPath = baseMatch[3];
40+
41+
let lines: number[] = [];
42+
let pathWithoutLines = restOfPath;
43+
44+
// Get the lines from the URL
45+
const lineNumberRegex = /#L(\d+)(?:-L(\d+))?$/;
46+
const lineMatch = restOfPath.match(lineNumberRegex);
47+
48+
if (lineMatch) {
49+
const startLine = Number.parseInt(lineMatch[1], 10);
50+
const endLine = lineMatch[2] ? Number.parseInt(lineMatch[2], 10) : startLine;
51+
lines = [startLine, endLine];
52+
pathWithoutLines = restOfPath.replace(lineNumberRegex, '');
53+
}
54+
55+
// Split the remaining path to separate ref from file path
56+
const pathParts = pathWithoutLines.split('/');
57+
58+
// The first part is always the ref (branch/tag)
59+
const ref = pathParts[0];
60+
61+
// Everything after the first part is the file path
62+
const fileName = pathParts.slice(1).join('/');
63+
5664
return {
5765
orgName,
5866
repoName,
@@ -89,9 +97,8 @@ const getGithubApiResponse = async (
8997
if (!res.ok) {
9098
if (res.status === 403 || res.status === 404) {
9199
return false;
92-
} else {
93-
throw new Error(`Response status from ${baseURL}: ${res.status}`);
94100
}
101+
throw new Error(`Response status from ${baseURL}: ${res.status}`);
95102
}
96103

97104
const body = await res.text();
Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
import { describe, it, expect } from 'bun:test';
2+
3+
import { splitGithubUrl } from '../src/github';
4+
5+
describe('splitGithubUrl', () => {
6+
describe('valid GitHub URLs', () => {
7+
it('should split a basic GitHub URL', () => {
8+
const url = 'https://github.com/gitbookio/gitbook/blob/master/README.md';
9+
const result = splitGithubUrl(url);
10+
expect(result).toEqual({
11+
orgName: 'gitbookio',
12+
repoName: 'gitbook',
13+
fileName: 'README.md',
14+
ref: 'master',
15+
lines: [],
16+
});
17+
});
18+
19+
it('should split GitHub URL with line range', () => {
20+
const url = 'https://github.com/gitbookio/gitbook/blob/master/README.md#L1-L2';
21+
const result = splitGithubUrl(url);
22+
expect(result).toEqual({
23+
orgName: 'gitbookio',
24+
repoName: 'gitbook',
25+
fileName: 'README.md',
26+
ref: 'master',
27+
lines: [1, 2],
28+
});
29+
});
30+
31+
it('should split GitHub URL with single line', () => {
32+
const url = 'https://github.com/gitbookio/gitbook/blob/master/README.md#L1';
33+
const result = splitGithubUrl(url);
34+
expect(result).toEqual({
35+
orgName: 'gitbookio',
36+
repoName: 'gitbook',
37+
fileName: 'README.md',
38+
ref: 'master',
39+
lines: [1, 1],
40+
});
41+
});
42+
43+
it('should handle repo names with dots', () => {
44+
const url = 'https://github.com/vercel/next.js/blob/canary/package.json';
45+
const result = splitGithubUrl(url);
46+
expect(result).toEqual({
47+
orgName: 'vercel',
48+
repoName: 'next.js',
49+
fileName: 'package.json',
50+
ref: 'canary',
51+
lines: [],
52+
});
53+
});
54+
55+
it('should handle repo names with hyphens and underscores', () => {
56+
const url = 'https://github.com/vercel/next-learn/blob/main/package.json';
57+
const result = splitGithubUrl(url);
58+
expect(result).toEqual({
59+
orgName: 'vercel',
60+
repoName: 'next-learn',
61+
fileName: 'package.json',
62+
ref: 'main',
63+
lines: [],
64+
});
65+
});
66+
67+
it('should handle nested file paths', () => {
68+
const url = 'https://github.com/facebook/react/blob/main/src/index.js';
69+
const result = splitGithubUrl(url);
70+
expect(result).toEqual({
71+
orgName: 'facebook',
72+
repoName: 'react',
73+
fileName: 'src/index.js',
74+
ref: 'main',
75+
lines: [],
76+
});
77+
});
78+
79+
it('should handle deeply nested file paths', () => {
80+
const url =
81+
'https://github.com/microsoft/vscode/blob/main/src/vs/workbench/contrib/terminal/browser/terminal.ts';
82+
const result = splitGithubUrl(url);
83+
expect(result).toEqual({
84+
orgName: 'microsoft',
85+
repoName: 'vscode',
86+
fileName: 'src/vs/workbench/contrib/terminal/browser/terminal.ts',
87+
ref: 'main',
88+
lines: [],
89+
});
90+
});
91+
92+
it('should handle URLs with query parameters', () => {
93+
const url = 'https://github.com/gitbookio/gitbook/blob/master/README.md?query=test';
94+
const result = splitGithubUrl(url);
95+
expect(result).toEqual({
96+
orgName: 'gitbookio',
97+
repoName: 'gitbook',
98+
fileName: 'README.md',
99+
ref: 'master',
100+
lines: [],
101+
});
102+
});
103+
104+
it('should handle URLs with both query parameters and line numbers', () => {
105+
const url =
106+
'https://github.com/gitbookio/gitbook/blob/master/README.md?query=test#L5-L10';
107+
const result = splitGithubUrl(url);
108+
expect(result).toEqual({
109+
orgName: 'gitbookio',
110+
repoName: 'gitbook',
111+
fileName: 'README.md',
112+
ref: 'master',
113+
lines: [5, 10],
114+
});
115+
});
116+
117+
it('should handle HTTP URLs', () => {
118+
const url = 'http://github.com/gitbookio/gitbook/blob/master/README.md';
119+
const result = splitGithubUrl(url);
120+
expect(result).toEqual({
121+
orgName: 'gitbookio',
122+
repoName: 'gitbook',
123+
fileName: 'README.md',
124+
ref: 'master',
125+
lines: [],
126+
});
127+
});
128+
129+
it('should handle organization names with hyphens', () => {
130+
const url = 'https://github.com/microsoft-docs/azure-docs/blob/main/README.md';
131+
const result = splitGithubUrl(url);
132+
expect(result).toEqual({
133+
orgName: 'microsoft-docs',
134+
repoName: 'azure-docs',
135+
fileName: 'README.md',
136+
ref: 'main',
137+
lines: [],
138+
});
139+
});
140+
});
141+
142+
describe('invalid GitHub URLs', () => {
143+
it('should return undefined for non-GitHub URLs', () => {
144+
const url = 'https://gitlab.com/gitbookio/gitbook/blob/master/README.md';
145+
const result = splitGithubUrl(url);
146+
expect(result).toBeUndefined();
147+
});
148+
149+
it('should return undefined for malformed GitHub URLs', () => {
150+
const url = 'https://github.com/gitbookio/gitbook/README.md';
151+
const result = splitGithubUrl(url);
152+
expect(result).toBeUndefined();
153+
});
154+
155+
it('should return undefined for GitHub URLs without blob', () => {
156+
const url = 'https://github.com/gitbookio/gitbook/tree/master/README.md';
157+
const result = splitGithubUrl(url);
158+
expect(result).toBeUndefined();
159+
});
160+
161+
it('should return undefined for empty string', () => {
162+
const result = splitGithubUrl('');
163+
expect(result).toBeUndefined();
164+
});
165+
166+
it('should return undefined for malformed URLs', () => {
167+
const testCases = [
168+
'not-a-url',
169+
'http://',
170+
'https://',
171+
'ftp://example.com',
172+
173+
'javascript:alert("test")',
174+
'://github.com/test/repo',
175+
'github.com/test/repo', // missing protocol
176+
];
177+
178+
for (const url of testCases) {
179+
const result = splitGithubUrl(url);
180+
expect(result).toBeUndefined();
181+
}
182+
});
183+
});
184+
});
Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
{
2-
"extends": "@gitbook/tsconfig/integration.json"
2+
"extends": "@gitbook/tsconfig/integration.json",
3+
"compilerOptions": {
4+
"types": ["bun"]
5+
}
36
}

0 commit comments

Comments
 (0)