-
Notifications
You must be signed in to change notification settings - Fork 29
Open
Description
We need to format some large sql sometimes and unfortunately, while polyglot is much faster than what we had been using, the built wasm doesn't have enough memory. Is there a way to provide one with more memory, or a way we could choose.
Here is a test to run:
import { format, Dialect } from "@polyglot-sql/sdk";
// Map our dialect names to polyglot Dialect enum
const DIALECT_MAP = {
postgresql: Dialect.PostgreSQL,
mysql: Dialect.MySQL,
bigquery: Dialect.BigQuery,
snowflake: Dialect.Snowflake,
redshift: Dialect.Redshift,
presto: Dialect.Presto,
trino: Dialect.Trino,
clickhouse: Dialect.ClickHouse,
databricks: Dialect.Databricks,
athena: Dialect.Athena,
mssql: Dialect.TSQL,
sqlite: Dialect.SQLite,
};
function formatWithPolyglot(sql, dialect) {
const polyglotDialect = DIALECT_MAP[dialect] ?? Dialect.PostgreSQL;
const result = format(sql, polyglotDialect);
if (!result.success || !result.sql?.length) {
return sql; // fallback to original on parse error
}
return result.sql[0];
}
// Generate SQL of various sizes
function generateSQL(size) {
const baseQuery = "SELECT id, name, email, created_at FROM users WHERE ";
const conditions = [];
for (let i = 0; i < size; i++) {
conditions.push(`field_${i} = 'value_${i}'`);
}
return baseQuery + conditions.join(" AND ");
}
function benchmark(name, fn, iterations = 10) {
const start = process.hrtime.bigint();
for (let i = 0; i < iterations; i++) {
fn();
}
const end = process.hrtime.bigint();
const durationMs = Number(end - start) / 1_000_000;
const avgMs = durationMs / iterations;
return { total: durationMs.toFixed(2), avg: avgMs.toFixed(2) };
}
function test() {
const dialect = "postgresql";
const sizes = [
{ name: "Small (10 conditions)", conditions: 10 },
{ name: "Medium (50 conditions)", conditions: 50 },
{ name: "Large (500 conditions)", conditions: 500 },
{ name: "XLarge (5000 conditions)", conditions: 5000 },
];
console.log("Polyglot SQL Format Speed Test");
console.log(
"(Rust/WASM-powered formatter from https://github.com/tobilg/polyglot)",
);
console.log("=".repeat(80));
console.log("Running 10 iterations for each test...\n");
for (const { name, conditions } of sizes) {
const sql = generateSQL(conditions);
const sqlLength = sql.length;
console.log(`\n${name} (${sqlLength} chars)`);
console.log("-".repeat(80));
let result;
try {
result = benchmark(
"polyglot format",
() => formatWithPolyglot(sql, dialect),
10,
);
console.log(
` polyglot format: ${result.avg}ms avg (${result.total}ms total)`,
);
} catch (err) {
console.log(
` polyglot format: FAILED - ${err.message} (WASM memory limit on large SQL)`,
);
}
}
console.log("\n" + "=".repeat(80));
console.log(`
Comparison (sql-formatter v15 on same machine):
Small: ~4.9ms | Polyglot ~2.9ms (~1.7x faster)
Medium: ~5.0ms | Polyglot ~0.7ms (~7x faster)
Large: ~41ms | Polyglot ~5ms (~8x faster)
XLarge: ~2291ms | Polyglot FAILS (WASM memory limit)
`);
}
test();
process.exit(0);
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels