Skip to content

Runs out of memory for large sql. #27

@tzjames

Description

@tzjames

We need to format some large sql sometimes and unfortunately, while polyglot is much faster than what we had been using, the built wasm doesn't have enough memory. Is there a way to provide one with more memory, or a way we could choose.

Here is a test to run:

import { format, Dialect } from "@polyglot-sql/sdk";

// Map our dialect names to polyglot Dialect enum
const DIALECT_MAP = {
  postgresql: Dialect.PostgreSQL,
  mysql: Dialect.MySQL,
  bigquery: Dialect.BigQuery,
  snowflake: Dialect.Snowflake,
  redshift: Dialect.Redshift,
  presto: Dialect.Presto,
  trino: Dialect.Trino,
  clickhouse: Dialect.ClickHouse,
  databricks: Dialect.Databricks,
  athena: Dialect.Athena,
  mssql: Dialect.TSQL,
  sqlite: Dialect.SQLite,
};

function formatWithPolyglot(sql, dialect) {
  const polyglotDialect = DIALECT_MAP[dialect] ?? Dialect.PostgreSQL;
  const result = format(sql, polyglotDialect);
  if (!result.success || !result.sql?.length) {
    return sql; // fallback to original on parse error
  }
  return result.sql[0];
}

// Generate SQL of various sizes
function generateSQL(size) {
  const baseQuery = "SELECT id, name, email, created_at FROM users WHERE ";
  const conditions = [];

  for (let i = 0; i < size; i++) {
    conditions.push(`field_${i} = 'value_${i}'`);
  }

  return baseQuery + conditions.join(" AND ");
}

function benchmark(name, fn, iterations = 10) {
  const start = process.hrtime.bigint();

  for (let i = 0; i < iterations; i++) {
    fn();
  }

  const end = process.hrtime.bigint();
  const durationMs = Number(end - start) / 1_000_000;
  const avgMs = durationMs / iterations;

  return { total: durationMs.toFixed(2), avg: avgMs.toFixed(2) };
}

function test() {
  const dialect = "postgresql";
  const sizes = [
    { name: "Small (10 conditions)", conditions: 10 },
    { name: "Medium (50 conditions)", conditions: 50 },
    { name: "Large (500 conditions)", conditions: 500 },
    { name: "XLarge (5000 conditions)", conditions: 5000 },
  ];

  console.log("Polyglot SQL Format Speed Test");
  console.log(
    "(Rust/WASM-powered formatter from https://github.com/tobilg/polyglot)",
  );
  console.log("=".repeat(80));
  console.log("Running 10 iterations for each test...\n");

  for (const { name, conditions } of sizes) {
    const sql = generateSQL(conditions);
    const sqlLength = sql.length;

    console.log(`\n${name} (${sqlLength} chars)`);
    console.log("-".repeat(80));

    let result;
    try {
      result = benchmark(
        "polyglot format",
        () => formatWithPolyglot(sql, dialect),
        10,
      );
      console.log(
        `  polyglot format: ${result.avg}ms avg  (${result.total}ms total)`,
      );
    } catch (err) {
      console.log(
        `  polyglot format: FAILED - ${err.message} (WASM memory limit on large SQL)`,
      );
    }
  }

  console.log("\n" + "=".repeat(80));
  console.log(`
Comparison (sql-formatter v15 on same machine):
  Small:   ~4.9ms  |  Polyglot ~2.9ms  (~1.7x faster)
  Medium:  ~5.0ms  |  Polyglot ~0.7ms  (~7x faster)
  Large:   ~41ms   |  Polyglot ~5ms    (~8x faster)
  XLarge:  ~2291ms |  Polyglot FAILS   (WASM memory limit)
`);
}

test();
process.exit(0);

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions