1
+ ---
2
+ name : Detect Duplicate Issues
3
+
4
+ on :
5
+ issues :
6
+ types :
7
+ - opened
8
+
9
+ permissions :
10
+ contents : read
11
+ issues : write
12
+ models : read
13
+
14
+ jobs :
15
+ detect-duplicates :
16
+ runs-on : ubuntu-latest
17
+ steps :
18
+ - name : Detect potential duplicate issues
19
+ uses : actions/github-script@v7
20
+ with :
21
+ script : |
22
+ const { owner, repo } = context.repo;
23
+ const issueNumber = context.issue.number;
24
+
25
+ // Get the newly created issue
26
+ const { data: newIssue } = await github.rest.issues.get({
27
+ owner,
28
+ repo,
29
+ issue_number: issueNumber,
30
+ });
31
+
32
+ // Skip if the issue is a pull request
33
+ if (newIssue.pull_request) {
34
+ console.log('Skipping pull request');
35
+ return;
36
+ }
37
+
38
+ console.log('Analyzing issue #' + issueNumber + ': "' + newIssue.title + '"');
39
+
40
+ // Get existing open issues (excluding the current one)
41
+ const { data: existingIssues } = await github.rest.issues.listForRepo({
42
+ owner,
43
+ repo,
44
+ state: 'open',
45
+ per_page: 100,
46
+ });
47
+
48
+ // Filter out pull requests and the current issue
49
+ const openIssues = existingIssues.filter(issue =>
50
+ !issue.pull_request && issue.number !== issueNumber
51
+ );
52
+
53
+ console.log('Found ' + openIssues.length + ' existing open issues to compare against');
54
+
55
+ if (openIssues.length === 0) {
56
+ console.log('No existing issues to compare against');
57
+ return;
58
+ }
59
+
60
+ // Use GitHub Models to find potential duplicates
61
+ const duplicates = [];
62
+
63
+ if (openIssues.length === 0) {
64
+ console.log('No existing issues to compare against');
65
+ return;
66
+ }
67
+
68
+ console.log('Analyzing ' + openIssues.length + ' existing issues for potential duplicates');
69
+
70
+ try {
71
+ // Helper function to safely escape content for prompts
72
+ function sanitizeContent(content) {
73
+ if (!content) return 'No description provided';
74
+ return content.replace(/[`'"\\]/g, ' ').slice(0, 500); // Limit length and escape problematic chars
75
+ }
76
+
77
+ // Helper function to retry AI calls with exponential backoff
78
+ async function retryApiCall(apiCallFn, maxRetries = 2) {
79
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
80
+ try {
81
+ const response = await apiCallFn();
82
+ if (response.ok) return response;
83
+
84
+ if (attempt < maxRetries) {
85
+ const delay = Math.pow(2, attempt) * 1000; // 1s, 2s, 4s delays
86
+ console.log('API call failed, retrying in ' + delay + 'ms (attempt ' + (attempt + 1) + '/' + (maxRetries + 1) + ')');
87
+ await new Promise(resolve => setTimeout(resolve, delay));
88
+ } else {
89
+ return response; // Return the failed response on final attempt
90
+ }
91
+ } catch (error) {
92
+ if (attempt === maxRetries) throw error;
93
+ const delay = Math.pow(2, attempt) * 1000;
94
+ console.log('API call error, retrying in ' + delay + 'ms: ' + error.message);
95
+ await new Promise(resolve => setTimeout(resolve, delay));
96
+ }
97
+ }
98
+ }
99
+
100
+ // Limit the number of issues to analyze to prevent token overflow
101
+ const maxIssuesForAnalysis = Math.min(openIssues.length, 50); // Limit to 50 issues max
102
+ const issuesToAnalyze = openIssues.slice(0, maxIssuesForAnalysis);
103
+
104
+ if (issuesToAnalyze.length < openIssues.length) {
105
+ console.log('Limiting analysis to ' + maxIssuesForAnalysis + ' most recent issues (out of ' + openIssues.length + ' total)');
106
+ }
107
+
108
+ // Step 1: Send issue titles and numbers to get top 5 candidates
109
+ let titlePrompt = 'Analyze this NEW ISSUE against EXISTING ISSUES and identify the top 5 most similar ones:\n\n';
110
+ titlePrompt += 'NEW ISSUE:\n';
111
+ titlePrompt += 'Title: ' + sanitizeContent(newIssue.title) + '\n';
112
+ titlePrompt += 'Body: ' + sanitizeContent(newIssue.body) + '\n\n';
113
+ titlePrompt += 'EXISTING ISSUES:\n';
114
+
115
+ issuesToAnalyze.forEach((issue, index) => {
116
+ titlePrompt += (index + 1) + '. Issue #' + issue.number + ' - ' + sanitizeContent(issue.title) + '\n';
117
+ });
118
+
119
+ titlePrompt += '\nRespond with a JSON object containing the top 5 most similar issues. Format: {"similar_issues": [{"rank": 1, "issue_number": 123, "similarity": "high|medium"}, ...]}';
120
+
121
+ const titleResponse = await retryApiCall(() =>
122
+ fetch('https://models.inference.ai.azure.com/chat/completions', {
123
+ method: 'POST',
124
+ headers: {
125
+ 'Authorization': 'Bearer ' + github.token,
126
+ 'Content-Type': 'application/json',
127
+ },
128
+ body: JSON.stringify({
129
+ messages: [
130
+ {
131
+ role: 'system',
132
+ content: 'You are an expert at analyzing GitHub issues to detect duplicates. Compare issue titles and descriptions to identify the most similar ones. Respond only with valid JSON containing the top 5 most similar issues ranked by relevance. Use "high" for likely duplicates and "medium" for related issues.'
133
+ },
134
+ {
135
+ role: 'user',
136
+ content: titlePrompt
137
+ }
138
+ ],
139
+ model: 'gpt-4o-mini',
140
+ temperature: 0.1,
141
+ max_tokens: 200
142
+ })
143
+ })
144
+ );
145
+
146
+ if (!titleResponse.ok) {
147
+ const errorText = await titleResponse.text();
148
+ console.log('First AI call failed after retries: ' + titleResponse.status + ' - ' + errorText);
149
+ return;
150
+ }
151
+
152
+ const titleResult = await titleResponse.json();
153
+ const titleAnalysis = titleResult.choices[0]?.message?.content?.trim();
154
+ console.log('AI title analysis result: ' + titleAnalysis);
155
+
156
+ // Parse JSON response to get top 5 candidates
157
+ let candidateIssueNumbers = [];
158
+ try {
159
+ const jsonMatch = titleAnalysis.match(/\{.*\}/s);
160
+ if (jsonMatch) {
161
+ const jsonData = JSON.parse(jsonMatch[0]);
162
+ candidateIssueNumbers = jsonData.similar_issues || [];
163
+ }
164
+ } catch (parseError) {
165
+ console.log('Failed to parse JSON response, falling back to number extraction');
166
+ // Fallback: extract issue numbers from response
167
+ const numberMatches = titleAnalysis.match(/#(\d+)/g);
168
+ if (numberMatches) {
169
+ candidateIssueNumbers = numberMatches.slice(0, 5).map(match => ({
170
+ issue_number: parseInt(match.replace('#', '')),
171
+ similarity: 'medium'
172
+ }));
173
+ }
174
+ }
175
+
176
+ if (candidateIssueNumbers.length === 0) {
177
+ console.log('No candidate issues identified in first pass');
178
+ return;
179
+ }
180
+
181
+ console.log('Found ' + candidateIssueNumbers.length + ' candidate issues from title analysis');
182
+
183
+ // Step 2: Get full details for top candidates and do detailed analysis
184
+ const candidateIssues = [];
185
+ for (const candidate of candidateIssueNumbers) {
186
+ const issue = openIssues.find(i => i.number === candidate.issue_number);
187
+ if (issue) {
188
+ candidateIssues.push({
189
+ issue,
190
+ initialSimilarity: candidate.similarity
191
+ });
192
+ }
193
+ }
194
+
195
+ if (candidateIssues.length === 0) {
196
+ console.log('No valid candidate issues found');
197
+ return;
198
+ }
199
+
200
+ // Step 3: Detailed analysis with full issue bodies
201
+ let detailPrompt = 'Perform detailed comparison of this NEW ISSUE against the TOP CANDIDATE ISSUES:\n\n';
202
+ detailPrompt += 'NEW ISSUE:\n';
203
+ detailPrompt += 'Title: ' + sanitizeContent(newIssue.title) + '\n';
204
+ detailPrompt += 'Body: ' + sanitizeContent(newIssue.body) + '\n\n';
205
+ detailPrompt += 'CANDIDATE ISSUES FOR DETAILED ANALYSIS:\n';
206
+
207
+ candidateIssues.forEach((candidate, index) => {
208
+ detailPrompt += (index + 1) + '. Issue #' + candidate.issue.number + '\n';
209
+ detailPrompt += ' Title: ' + sanitizeContent(candidate.issue.title) + '\n';
210
+ detailPrompt += ' Body: ' + sanitizeContent(candidate.issue.body) + '\n\n';
211
+ });
212
+
213
+ detailPrompt += 'Respond with JSON format: {"duplicates": [{"issue_number": 123, "classification": "DUPLICATE|SIMILAR|DIFFERENT", "reason": "brief explanation"}]}';
214
+
215
+ const detailResponse = await retryApiCall(() =>
216
+ fetch('https://models.inference.ai.azure.com/chat/completions', {
217
+ method: 'POST',
218
+ headers: {
219
+ 'Authorization': 'Bearer ' + github.token,
220
+ 'Content-Type': 'application/json',
221
+ },
222
+ body: JSON.stringify({
223
+ messages: [
224
+ {
225
+ role: 'system',
226
+ content: 'You are an expert at analyzing GitHub issues for duplicates. Compare the full content and determine: DUPLICATE (same core problem), SIMILAR (related but different aspects), or DIFFERENT (unrelated). Respond only with valid JSON.'
227
+ },
228
+ {
229
+ role: 'user',
230
+ content: detailPrompt
231
+ }
232
+ ],
233
+ model: 'gpt-4o-mini',
234
+ temperature: 0.1,
235
+ max_tokens: 300
236
+ })
237
+ })
238
+ );
239
+
240
+ if (detailResponse.ok) {
241
+ const detailResult = await detailResponse.json();
242
+ const detailAnalysis = detailResult.choices[0]?.message?.content?.trim();
243
+ console.log('AI detailed analysis result: ' + detailAnalysis);
244
+
245
+ // Parse detailed analysis JSON
246
+ try {
247
+ const jsonMatch = detailAnalysis.match(/\{.*\}/s);
248
+ if (jsonMatch) {
249
+ const jsonData = JSON.parse(jsonMatch[0]);
250
+ const results = jsonData.duplicates || [];
251
+
252
+ for (const result of results) {
253
+ if (result.classification === 'DUPLICATE' || result.classification === 'SIMILAR') {
254
+ const issue = candidateIssues.find(c => c.issue.number === result.issue_number)?.issue;
255
+ if (issue) {
256
+ duplicates.push({
257
+ issue,
258
+ similarity: result.classification === 'DUPLICATE' ? 'high' : 'medium'
259
+ });
260
+ console.log('Found ' + result.classification.toLowerCase() + ' issue: #' + issue.number + ' - ' + issue.title);
261
+ }
262
+ }
263
+ }
264
+ }
265
+ } catch (parseError) {
266
+ console.log('Failed to parse detailed analysis JSON, using fallback');
267
+ // Fallback: look for DUPLICATE/SIMILAR mentions
268
+ candidateIssues.forEach(candidate => {
269
+ const issueRef = '#' + candidate.issue.number;
270
+ if (detailAnalysis.includes(issueRef) &&
271
+ (detailAnalysis.includes('DUPLICATE') || detailAnalysis.includes('SIMILAR'))) {
272
+ duplicates.push({
273
+ issue: candidate.issue,
274
+ similarity: detailAnalysis.includes('DUPLICATE') ? 'high' : 'medium'
275
+ });
276
+ console.log('Found similar issue (fallback): #' + candidate.issue.number + ' - ' + candidate.issue.title);
277
+ }
278
+ });
279
+ }
280
+ } else {
281
+ const errorText = await detailResponse.text();
282
+ console.log('Detailed analysis failed after retries: ' + detailResponse.status + ' - ' + errorText);
283
+ }
284
+
285
+ } catch (error) {
286
+ console.log('Error in AI analysis: ' + error.message);
287
+ }
288
+
289
+ // Post comment if duplicates found
290
+ if (duplicates.length > 0) {
291
+ const highPriority = duplicates.filter(d => d.similarity === 'high');
292
+ const mediumPriority = duplicates.filter(d => d.similarity === 'medium');
293
+
294
+ let commentBody = '👋 **Potential duplicate issues detected**\n\n';
295
+ commentBody += 'This issue appears to be similar to existing open issues:\n\n';
296
+
297
+ if (highPriority.length > 0) {
298
+ commentBody += '### 🚨 Likely Duplicates\n';
299
+ for (const { issue } of highPriority) {
300
+ commentBody += '- #' + issue.number + ' - [' + issue.title + '](' + issue.html_url + ')\n';
301
+ }
302
+ commentBody += '\n';
303
+ }
304
+
305
+ if (mediumPriority.length > 0) {
306
+ commentBody += '### 🔍 Similar Issues\n';
307
+ for (const { issue } of mediumPriority) {
308
+ commentBody += '- #' + issue.number + ' - [' + issue.title + '](' + issue.html_url + ')\n';
309
+ }
310
+ commentBody += '\n';
311
+ }
312
+
313
+ commentBody += 'Please review these issues to see if your issue is already covered. ';
314
+ commentBody += 'If this is indeed a duplicate, consider closing this issue and contributing to the existing discussion.\n\n';
315
+ commentBody += '---\n';
316
+ commentBody += '*This comment was automatically generated using AI to help identify potential duplicates.*';
317
+
318
+ await github.rest.issues.createComment({
319
+ owner,
320
+ repo,
321
+ issue_number: issueNumber,
322
+ body: commentBody,
323
+ });
324
+
325
+ console.log('Posted comment with ' + duplicates.length + ' potential duplicate(s)');
326
+ } else {
327
+ console.log('No potential duplicates found');
328
+ }
0 commit comments