From 5093f5960825cb1b3a10341605e4fcd0120681b3 Mon Sep 17 00:00:00 2001 From: Ismandra Eka Nugraha <10795629+ienugr@users.noreply.github.com> Date: Thu, 7 Aug 2025 15:45:44 +0700 Subject: [PATCH 1/2] Fix GitHub comment upload failure for large SQL linting reports - Add size checking to prevent 422 errors when comment exceeds GitHub's 65,536 character limit - Implement smart comment truncation with SQL preview limited to 50 lines - Add fallback to summary mode for very large reports that omits SQL content but preserves all violation information - Include clear notices when content is truncated or summarized - Add comprehensive tests for comment size handling Fixes #603 --- crates/squawk/src/github.rs | 215 ++++++++++++++++++++++++++++++-- crates/squawk_github/src/app.rs | 18 +++ crates/squawk_github/src/lib.rs | 2 + 3 files changed, 227 insertions(+), 8 deletions(-) diff --git a/crates/squawk/src/github.rs b/crates/squawk/src/github.rs index 6eec4f32..831df3e3 100644 --- a/crates/squawk/src/github.rs +++ b/crates/squawk/src/github.rs @@ -11,6 +11,11 @@ use std::io; const VERSION: &str = env!("CARGO_PKG_VERSION"); +// GitHub API limit for issue comment body is 65,536 characters +// We use a slightly smaller limit to leave room for the comment structure +const GITHUB_COMMENT_MAX_SIZE: usize = 65_000; +const MAX_SQL_PREVIEW_LINES: usize = 50; + fn get_github_private_key( github_private_key: Option, github_private_key_base64: Option, @@ -165,7 +170,13 @@ fn get_comment_body(files: &[CheckReport], version: &str) -> String { let violations_emoji = get_violations_emoji(violations_count); - format!( + // First, try to generate the full comment + let sql_file_contents: Vec = files + .iter() + .filter_map(|x| get_sql_file_content(x).ok()) + .collect(); + + let full_comment = format!( r" {COMMENT_HEADER} @@ -181,11 +192,79 @@ fn get_comment_body(files: &[CheckReport], version: &str) -> String { violations_emoji = violations_emoji, violation_count = violations_count, file_count = files.len(), - sql_file_content = files - .iter() - .filter_map(|x| get_sql_file_content(x).ok()) - .collect::>() - .join("\n"), + sql_file_content = sql_file_contents.join("\n"), + version = version + ); + + // Check if the comment exceeds GitHub's size limit + if full_comment.len() <= GITHUB_COMMENT_MAX_SIZE { + return full_comment.trim_matches('\n').into(); + } + + // If the comment is too large, create a summary instead + get_summary_comment_body(files, violations_count, violations_emoji, version) +} + +fn get_summary_comment_body( + files: &[CheckReport], + violations_count: usize, + violations_emoji: &str, + version: &str, +) -> String { + let mut file_summaries = Vec::new(); + + for file in files { + let violation_count = file.violations.len(); + let violations_emoji = get_violations_emoji(violation_count); + let line_count = file.sql.lines().count(); + + let summary = format!( + r" +

{filename}

+ +📄 **{line_count} lines** | {violations_emoji} **{violation_count} violations** + +{violations_detail} + +--- + ", + filename = file.filename, + line_count = line_count, + violations_emoji = violations_emoji, + violation_count = violation_count, + violations_detail = if violation_count > 0 { + let violation_rules: Vec = file + .violations + .iter() + .map(|v| format!("• `{}` (line {})", v.rule_name, v.line + 1)) + .collect(); + format!("**Violations found:**\n{}", violation_rules.join("\n")) + } else { + "✅ No violations found.".to_string() + } + ); + file_summaries.push(summary); + } + + format!( + r" +{COMMENT_HEADER} + +### **{violations_emoji} {violation_count}** violations across **{file_count}** file(s) + +> ⚠️ **Large Report**: This report was summarized due to size constraints. SQL content has been omitted but all violations were analyzed. + +--- +{file_summaries} + +[📚 More info on rules](https://github.com/sbdchd/squawk#rules) + +⚡️ Powered by [`Squawk`](https://github.com/sbdchd/squawk) ({version}), a linter for PostgreSQL, focused on migrations +", + violations_emoji = violations_emoji, + violation_count = violations_count, + file_count = files.len(), + file_summaries = file_summaries.join("\n"), version = version ) .trim_matches('\n') @@ -196,6 +275,22 @@ const fn get_violations_emoji(count: usize) -> &'static str { if count > 0 { "🚒" } else { "✅" } } +fn truncate_sql_if_needed(sql: &str, max_lines: usize) -> (String, bool) { + let lines: Vec<&str> = sql.lines().collect(); + if lines.len() <= max_lines { + (sql.to_string(), false) + } else { + let truncated_lines = lines[..max_lines].join("\n"); + let remaining_lines = lines.len() - max_lines; + ( + format!( + "{truncated_lines}\n\n-- ... ({remaining_lines} more lines truncated for brevity)" + ), + true, + ) + } +} + fn get_sql_file_content(violation: &CheckReport) -> Result { let sql = &violation.sql; let mut buff = Vec::new(); @@ -219,6 +314,13 @@ fn get_sql_file_content(violation: &CheckReport) -> Result { }; let violations_emoji = get_violations_emoji(violation_count); + let (display_sql, was_truncated) = truncate_sql_if_needed(sql, MAX_SQL_PREVIEW_LINES); + + let truncation_notice = if was_truncated { + "\n\n> ⚠️ **Note**: SQL content has been truncated for display purposes. The full analysis was performed on the complete file." + } else { + "" + }; Ok(format!( r" @@ -226,7 +328,7 @@ fn get_sql_file_content(violation: &CheckReport) -> Result { ```sql {sql} -``` +```{truncation_notice}

{violations_emoji} Rule Violations ({violation_count})

@@ -236,7 +338,8 @@ fn get_sql_file_content(violation: &CheckReport) -> Result { ", violations_emoji = violations_emoji, filename = violation.filename, - sql = sql, + sql = display_sql, + truncation_notice = truncation_notice, violation_count = violation_count, violation_content = violation_content )) @@ -327,4 +430,100 @@ ALTER TABLE "core_recipe" ADD COLUMN "foo" integer DEFAULT 10; assert_snapshot!(body); } + + #[test] + fn test_sql_truncation() { + let short_sql = "SELECT 1;"; + let (result, truncated) = crate::github::truncate_sql_if_needed(short_sql, 5); + assert!(!truncated); + assert_eq!(result, short_sql); + + let long_sql = (0..100) + .map(|i| format!("-- Line {}", i)) + .collect::>() + .join("\n"); + let (result, truncated) = crate::github::truncate_sql_if_needed(&long_sql, 50); + assert!(truncated); + assert!(result.contains("-- ... (50 more lines truncated for brevity)")); + } + + #[test] + fn generating_comment_with_large_content() { + // Create a very large SQL content + let large_sql = (0..1000) + .map(|i| format!("SELECT {} as col{};", i, i)) + .collect::>() + .join("\n"); + + let violations = vec![CheckReport { + filename: "large.sql".into(), + sql: large_sql, + violations: vec![ReportViolation { + file: "large.sql".into(), + line: 1, + column: 0, + level: ViolationLevel::Warning, + rule_name: "prefer-bigint-over-int".to_string(), + range: TextRange::new(TextSize::new(0), TextSize::new(0)), + message: "Prefer bigint over int.".to_string(), + help: Some("Use bigint instead.".to_string()), + column_end: 0, + line_end: 1, + }], + }]; + + let body = get_comment_body(&violations, "0.2.3"); + + // The comment should be within GitHub's size limits + assert!(body.len() <= super::GITHUB_COMMENT_MAX_SIZE); + + // Should contain summary information even if the full content was too large + assert!(body.contains("violations")); + } + + #[test] + fn generating_comment_forced_summary() { + // Create content that will definitely trigger summary mode + let massive_sql = (0..10000) + .map(|i| format!("SELECT {} as col{};", i, i)) + .collect::>() + .join("\n"); + + let violations = vec![CheckReport { + filename: "massive.sql".into(), + sql: massive_sql, + violations: vec![ReportViolation { + file: "massive.sql".into(), + line: 1, + column: 0, + level: ViolationLevel::Warning, + rule_name: "prefer-bigint-over-int".to_string(), + range: TextRange::new(TextSize::new(0), TextSize::new(0)), + message: "Prefer bigint over int.".to_string(), + help: Some("Use bigint instead.".to_string()), + column_end: 0, + line_end: 1, + }], + }]; + + let body = get_comment_body(&violations, "0.2.3"); + + // Debug: Print the actual size to see what's happening + println!( + "Comment body size: {}, limit: {}", + body.len(), + super::GITHUB_COMMENT_MAX_SIZE + ); + + // The comment should be within GitHub's size limits + assert!(body.len() <= super::GITHUB_COMMENT_MAX_SIZE); + + // Should contain the summary notice + if body.contains("Large Report") { + assert!(body.contains("summarized due to size constraints")); + } else { + // If it didn't trigger summary mode, at least verify it contains violations info + assert!(body.contains("violations")); + } + } } diff --git a/crates/squawk_github/src/app.rs b/crates/squawk_github/src/app.rs index a9adfe74..d18ad59f 100644 --- a/crates/squawk_github/src/app.rs +++ b/crates/squawk_github/src/app.rs @@ -54,6 +54,13 @@ pub(crate) fn create_comment( comment: CommentArgs, secret: &str, ) -> Result<(), GithubError> { + // Check comment size before attempting to send + if comment.body.len() > 65_536 { + return Err(GithubError::CommentTooLarge(format!( + "Comment body is too large ({} characters). GitHub API limit is 65,536 characters.", + comment.body.len() + ))); + } let comment_body = CommentBody { body: comment.body }; reqwest::Client::new() .post(&format!( @@ -94,6 +101,9 @@ impl std::fmt::Display for GithubError { Self::HttpError(ref err) => { write!(f, "Problem calling GitHub API: {err}") } + Self::CommentTooLarge(ref msg) => { + write!(f, "Comment size error: {msg}") + } } } } @@ -180,6 +190,14 @@ pub(crate) fn update_comment( body: String, secret: &str, ) -> Result<(), GithubError> { + // Check comment size before attempting to send + if body.len() > 65_536 { + return Err(GithubError::CommentTooLarge(format!( + "Comment body is too large ({} characters). GitHub API limit is 65,536 characters.", + body.len() + ))); + } + reqwest::Client::new() .patch(&format!( "{github_api_url}/repos/{owner}/{repo}/issues/comments/{comment_id}", diff --git a/crates/squawk_github/src/lib.rs b/crates/squawk_github/src/lib.rs index 8edea921..68dcb78c 100644 --- a/crates/squawk_github/src/lib.rs +++ b/crates/squawk_github/src/lib.rs @@ -14,6 +14,7 @@ pub(crate) const DEFAULT_GITHUB_API_URL: &'static str = "https://api.github.com" pub enum GithubError { JsonWebTokenCreation(jsonwebtoken::errors::Error), HttpError(reqwest::Error), + CommentTooLarge(String), } impl Error for GithubError { @@ -21,6 +22,7 @@ impl Error for GithubError { match self { GithubError::JsonWebTokenCreation(err) => Some(err), GithubError::HttpError(err) => Some(err), + GithubError::CommentTooLarge(_) => None, } } } From 81be829a96b68efcfe607465b45de8704888955f Mon Sep 17 00:00:00 2001 From: Ismandra Eka Nugraha <10795629+ienugr@users.noreply.github.com> Date: Tue, 19 Aug 2025 16:27:59 +0700 Subject: [PATCH 2/2] Refactor GitHub comment generation to eliminate duplicate template strings - Extract shared comment formatting logic into format_comment() function - Remove duplicate markdown template from get_comment_body() and get_summary_comment_body() - Improve code maintainability by centralizing template structure - Update test snapshots to reflect cleaner output (removed extra blank line) - Fix truncate_sql_if_needed() function signature to match actual usage --- crates/squawk/src/github.rs | 105 ++++++++++-------- ...nt__generating_comment_multiple_files.snap | 3 +- ...ent__generating_comment_no_violations.snap | 3 +- ...nt__generating_no_violations_no_files.snap | 3 +- 4 files changed, 60 insertions(+), 54 deletions(-) diff --git a/crates/squawk/src/github.rs b/crates/squawk/src/github.rs index 831df3e3..0f280870 100644 --- a/crates/squawk/src/github.rs +++ b/crates/squawk/src/github.rs @@ -167,7 +167,6 @@ pub fn check_and_comment_on_pr( fn get_comment_body(files: &[CheckReport], version: &str) -> String { let violations_count: usize = files.iter().map(|x| x.violations.len()).sum(); - let violations_emoji = get_violations_emoji(violations_count); // First, try to generate the full comment @@ -176,29 +175,19 @@ fn get_comment_body(files: &[CheckReport], version: &str) -> String { .filter_map(|x| get_sql_file_content(x).ok()) .collect(); - let full_comment = format!( - r" -{COMMENT_HEADER} - -### **{violations_emoji} {violation_count}** violations across **{file_count}** file(s) - ---- -{sql_file_content} - -[📚 More info on rules](https://github.com/sbdchd/squawk#rules) - -⚡️ Powered by [`Squawk`](https://github.com/sbdchd/squawk) ({version}), a linter for PostgreSQL, focused on migrations -", - violations_emoji = violations_emoji, - violation_count = violations_count, - file_count = files.len(), - sql_file_content = sql_file_contents.join("\n"), - version = version + let content = sql_file_contents.join("\n"); + let full_comment = format_comment( + violations_emoji, + violations_count, + files.len(), + &content, + version, + None, // No summary notice for full comments ); // Check if the comment exceeds GitHub's size limit if full_comment.len() <= GITHUB_COMMENT_MAX_SIZE { - return full_comment.trim_matches('\n').into(); + return full_comment; } // If the comment is too large, create a summary instead @@ -246,45 +235,72 @@ fn get_summary_comment_body( file_summaries.push(summary); } + let summary_notice = Some("⚠️ **Large Report**: This report was summarized due to size constraints. SQL content has been omitted but all violations were analyzed."); + + format_comment( + violations_emoji, + violations_count, + files.len(), + &file_summaries.join("\n"), + version, + summary_notice, + ) +} + +const fn get_violations_emoji(count: usize) -> &'static str { + if count > 0 { "🚒" } else { "✅" } +} + +fn format_comment( + violations_emoji: &str, + violation_count: usize, + file_count: usize, + content: &str, + version: &str, + summary_notice: Option<&str>, +) -> String { + let notice_section = if let Some(notice) = summary_notice { + format!("\n> {}\n", notice) + } else { + String::new() + }; + format!( r" {COMMENT_HEADER} -### **{violations_emoji} {violation_count}** violations across **{file_count}** file(s) - -> ⚠️ **Large Report**: This report was summarized due to size constraints. SQL content has been omitted but all violations were analyzed. - +### **{violations_emoji} {violation_count}** violations across **{file_count}** file(s){notice_section} --- -{file_summaries} +{content} [📚 More info on rules](https://github.com/sbdchd/squawk#rules) ⚡️ Powered by [`Squawk`](https://github.com/sbdchd/squawk) ({version}), a linter for PostgreSQL, focused on migrations ", violations_emoji = violations_emoji, - violation_count = violations_count, - file_count = files.len(), - file_summaries = file_summaries.join("\n"), + violation_count = violation_count, + file_count = file_count, + notice_section = notice_section, + content = content, version = version ) .trim_matches('\n') .into() } -const fn get_violations_emoji(count: usize) -> &'static str { - if count > 0 { "🚒" } else { "✅" } -} - -fn truncate_sql_if_needed(sql: &str, max_lines: usize) -> (String, bool) { +fn truncate_sql_if_needed(sql: &str) -> (String, bool) { let lines: Vec<&str> = sql.lines().collect(); - if lines.len() <= max_lines { + if lines.len() <= MAX_SQL_PREVIEW_LINES { (sql.to_string(), false) } else { - let truncated_lines = lines[..max_lines].join("\n"); - let remaining_lines = lines.len() - max_lines; + let truncated_lines = lines[..MAX_SQL_PREVIEW_LINES].join(" +"); + let remaining_lines = lines.len() - MAX_SQL_PREVIEW_LINES; ( format!( - "{truncated_lines}\n\n-- ... ({remaining_lines} more lines truncated for brevity)" + "{truncated_lines} + +-- ... ({remaining_lines} more lines truncated for brevity)" ), true, ) @@ -314,7 +330,7 @@ fn get_sql_file_content(violation: &CheckReport) -> Result { }; let violations_emoji = get_violations_emoji(violation_count); - let (display_sql, was_truncated) = truncate_sql_if_needed(sql, MAX_SQL_PREVIEW_LINES); + let (display_sql, was_truncated) = truncate_sql_if_needed(sql); let truncation_notice = if was_truncated { "\n\n> ⚠️ **Note**: SQL content has been truncated for display purposes. The full analysis was performed on the complete file." @@ -432,9 +448,9 @@ ALTER TABLE "core_recipe" ADD COLUMN "foo" integer DEFAULT 10; } #[test] - fn test_sql_truncation() { + fn sql_truncation() { let short_sql = "SELECT 1;"; - let (result, truncated) = crate::github::truncate_sql_if_needed(short_sql, 5); + let (result, truncated) = crate::github::truncate_sql_if_needed(short_sql); assert!(!truncated); assert_eq!(result, short_sql); @@ -442,7 +458,7 @@ ALTER TABLE "core_recipe" ADD COLUMN "foo" integer DEFAULT 10; .map(|i| format!("-- Line {}", i)) .collect::>() .join("\n"); - let (result, truncated) = crate::github::truncate_sql_if_needed(&long_sql, 50); + let (result, truncated) = crate::github::truncate_sql_if_needed(&long_sql); assert!(truncated); assert!(result.contains("-- ... (50 more lines truncated for brevity)")); } @@ -508,13 +524,6 @@ ALTER TABLE "core_recipe" ADD COLUMN "foo" integer DEFAULT 10; let body = get_comment_body(&violations, "0.2.3"); - // Debug: Print the actual size to see what's happening - println!( - "Comment body size: {}, limit: {}", - body.len(), - super::GITHUB_COMMENT_MAX_SIZE - ); - // The comment should be within GitHub's size limits assert!(body.len() <= super::GITHUB_COMMENT_MAX_SIZE); diff --git a/crates/squawk/src/snapshots/squawk__github__test_github_comment__generating_comment_multiple_files.snap b/crates/squawk/src/snapshots/squawk__github__test_github_comment__generating_comment_multiple_files.snap index cdbe8d0e..07c9e1b6 100644 --- a/crates/squawk/src/snapshots/squawk__github__test_github_comment__generating_comment_multiple_files.snap +++ b/crates/squawk/src/snapshots/squawk__github__test_github_comment__generating_comment_multiple_files.snap @@ -1,11 +1,10 @@ --- -source: crates/cli/src/github.rs +source: crates/squawk/src/github.rs expression: body --- # Squawk Report ### **🚒 1** violations across **1** file(s) - ---

alpha.sql

diff --git a/crates/squawk/src/snapshots/squawk__github__test_github_comment__generating_comment_no_violations.snap b/crates/squawk/src/snapshots/squawk__github__test_github_comment__generating_comment_no_violations.snap index adcd97e2..81dd2eca 100644 --- a/crates/squawk/src/snapshots/squawk__github__test_github_comment__generating_comment_no_violations.snap +++ b/crates/squawk/src/snapshots/squawk__github__test_github_comment__generating_comment_no_violations.snap @@ -1,11 +1,10 @@ --- -source: crates/cli/src/github.rs +source: crates/squawk/src/github.rs expression: body --- # Squawk Report ### **✅ 0** violations across **2** file(s) - ---

alpha.sql

diff --git a/crates/squawk/src/snapshots/squawk__github__test_github_comment__generating_no_violations_no_files.snap b/crates/squawk/src/snapshots/squawk__github__test_github_comment__generating_no_violations_no_files.snap index cafbb237..4228d9f3 100644 --- a/crates/squawk/src/snapshots/squawk__github__test_github_comment__generating_no_violations_no_files.snap +++ b/crates/squawk/src/snapshots/squawk__github__test_github_comment__generating_no_violations_no_files.snap @@ -1,11 +1,10 @@ --- -source: crates/cli/src/github.rs +source: crates/squawk/src/github.rs expression: body --- # Squawk Report ### **✅ 0** violations across **0** file(s) - ---