From 63fec36a966f404bca18f2aa7e505c02d758e717 Mon Sep 17 00:00:00 2001
From: Hannah Ramadan <hannahr315@gmail.com>
Date: Wed, 3 Sep 2025 09:29:34 -0700
Subject: [PATCH 1/6] Feat: db.query.summary parser

---
 .../opentelemetry/helpers/query_summary.rb    |  34 ++
 .../helpers/query_summary/cache.rb            |  48 +++
 .../helpers/query_summary/parser.rb           | 134 +++++++
 .../helpers/query_summary/tokenizer.rb        |  58 +++
 .../test/fixtures/query_summary.json          | 373 ++++++++++++++++++
 .../test/helpers/query_summary_test.rb        |  35 ++
 6 files changed, 682 insertions(+)
 create mode 100644 helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary.rb
 create mode 100644 helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/cache.rb
 create mode 100644 helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/parser.rb
 create mode 100644 helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/tokenizer.rb
 create mode 100644 helpers/sql-obfuscation/test/fixtures/query_summary.json
 create mode 100644 helpers/sql-obfuscation/test/helpers/query_summary_test.rb

diff --git a/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary.rb b/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary.rb
new file mode 100644
index 0000000000..f8146f65e0
--- /dev/null
+++ b/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary.rb
@@ -0,0 +1,34 @@
+# frozen_string_literal: true
+
+# Copyright The OpenTelemetry Authors
+#
+# SPDX-License-Identifier: Apache-2.0
+
+require_relative 'query_summary/cache'
+require_relative 'query_summary/tokenizer'
+require_relative 'query_summary/parser'
+
+module OpenTelemetry
+  module Helpers
+    # QuerySummary generates high-level summaries of SQL queries, made up of
+    # key operations and table names.
+    #
+    # Example:
+    #   QuerySummary.generate_summary("SELECT * FROM users WHERE id = 1")
+    #   # => "SELECT users"
+    module QuerySummary
+      def self.configure_cache(size: Cache::DEFAULT_SIZE)
+        Cache.configure(size: size)
+      end
+
+      def self.generate_summary(query)
+        Cache.fetch(query) do
+          tokens = Tokenizer.tokenize(query)
+          Parser.build_summary_from_tokens(tokens)
+        end
+      rescue StandardError
+        'UNKNOWN'
+      end
+    end
+  end
+end
diff --git a/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/cache.rb b/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/cache.rb
new file mode 100644
index 0000000000..5f70bae1d9
--- /dev/null
+++ b/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/cache.rb
@@ -0,0 +1,48 @@
+# frozen_string_literal: true
+
+# Copyright The OpenTelemetry Authors
+#
+# SPDX-License-Identifier: Apache-2.0module OpenTelemetry
+
+module OpenTelemetry
+  module Helpers
+    module QuerySummary
+      # Cache provides thread-safe LRU caching for query summaries.
+      #
+      # Stores generated query summaries to avoid reprocessing identical queries.
+      # Uses mutex synchronization for thread safety.
+      #
+      # @example
+      #   Cache.fetch("SELECT * FROM users") { "SELECT users" } # => "SELECT users"
+      class Cache
+        DEFAULT_SIZE = 1000
+
+        @cache = {}
+        @cache_mutex = Mutex.new
+        @cache_size = DEFAULT_SIZE
+
+        def self.fetch(key)
+          return @cache[key] if @cache.key?(key)
+
+          result = yield
+          store(key, result)
+          result
+        end
+
+        def self.configure(size: DEFAULT_SIZE)
+          @cache_mutex.synchronize do
+            @cache_size = size
+            @cache.clear if @cache.size > size
+          end
+        end
+
+        def self.store(key, value)
+          @cache_mutex.synchronize do
+            @cache.shift if @cache.size >= @cache_size
+            @cache[key] = value
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/parser.rb b/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/parser.rb
new file mode 100644
index 0000000000..866761ee12
--- /dev/null
+++ b/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/parser.rb
@@ -0,0 +1,134 @@
+# frozen_string_literal: true
+
+# Copyright The OpenTelemetry Authors
+#
+# SPDX-License-Identifier: Apache-2.0
+
+module OpenTelemetry
+  module Helpers
+    module QuerySummary
+      # Parser builds high-level SQL query summaries from tokenized input.
+      #
+      # Processes tokens to extract key operations and table names, creating
+      # summaries like "SELECT users" or "INSERT INTO orders".
+      #
+      # @example
+      #   tokens = [Token.new(:keyword, "SELECT"), Token.new(:identifier, "users")]
+      #   Parser.build_summary_from_tokens(tokens) # => "SELECT users"
+      class Parser
+        def self.build_summary_from_tokens(tokens)
+          summary_parts = []
+          state = :default # Either :default or :expect_collection
+          skip_until = 0 # Next token index to process; allows skipping tokens already consumed by previous operations
+
+          tokens.each_with_index do |token, index|
+            next if index < skip_until # Skip already processed tokens
+
+            result = process_token(token, tokens, index, state)
+
+            summary_parts.concat(result[:parts])
+            state = result[:new_state]
+            skip_until = result[:next_index]
+          end
+
+          summary_parts.join(' ')
+        end
+
+        def self.process_token(token, tokens, index, state)
+          operation_result = process_main_operation(token, tokens, index, state)
+          return operation_result if operation_result[:processed]
+
+          collection_result = process_collection_token(token, tokens, index, state)
+          return collection_result if collection_result[:processed]
+
+          { processed: false, parts: [], new_state: state, next_index: index + 1 }
+        end
+
+        def self.process_main_operation(token, tokens, index, current_state)
+          case token.value.upcase
+          when 'SELECT', 'INSERT', 'DELETE'
+            add_to_summary(token.value, :default, index + 1)
+          when 'WITH', 'UPDATE'
+            add_to_summary(token.value, :expect_collection, index + 1)
+          when 'FROM', 'INTO', 'JOIN', 'IN'
+            trigger_collection_mode(index + 1)
+          when 'CREATE', 'ALTER', 'DROP', 'TRUNCATE'
+            handle_table_operation(token, tokens, index)
+          when 'UNION'
+            handle_union(token, tokens, index)
+          else
+            not_processed(current_state, index + 1)
+          end
+        end
+
+        def self.process_collection_token(token, tokens, index, state)
+          return { processed: false, parts: [], new_state: state, next_index: index + 1 } unless state == :expect_collection
+
+          upcased_value = token.value.upcase
+
+          if identifier_like?(token) || (token.type == :keyword && can_be_table_name?(upcased_value))
+            skip_count = calculate_alias_skip(tokens, index)
+            new_state = tokens[index + 1 + skip_count]&.value == ',' ? :expect_collection : :default
+            skip_count += 1 if tokens[index + 1 + skip_count]&.value == ','
+
+            { processed: true, parts: [token.value], new_state: new_state, next_index: index + 1 + skip_count }
+          elsif token.value == '(' || token.type == :operator
+            { processed: true, parts: [], new_state: state, next_index: index + 1 }
+          else
+            { processed: true, parts: [], new_state: :default, next_index: index + 1 }
+          end
+        end
+
+        def self.identifier_like?(token)
+          %i[identifier quoted_identifier string].include?(token.type)
+        end
+
+        def self.can_be_table_name?(upcased_value)
+          # Keywords that can also be used as table/object names in certain contexts
+          %w[TABLE INDEX PROCEDURE VIEW DATABASE].include?(upcased_value)
+        end
+
+        def self.calculate_alias_skip(tokens, index)
+          if tokens[index + 1]&.value&.upcase == 'AS'
+            2  # Skip 'AS' and the alias
+          elsif tokens[index + 1]&.type == :identifier
+            1  # Skip the alias
+          else
+            0
+          end
+        end
+
+        def self.add_to_summary(part, new_state, next_index)
+          { processed: true, parts: [part], new_state: new_state, next_index: next_index }
+        end
+
+        def self.trigger_collection_mode(next_index)
+          { processed: true, parts: [], new_state: :expect_collection, next_index: next_index }
+        end
+
+        def self.not_processed(current_state, next_index)
+          { processed: false, parts: [], new_state: current_state, next_index: next_index }
+        end
+
+        def self.handle_union(token, tokens, index)
+          if tokens[index + 1]&.value&.upcase == 'ALL'
+            { processed: true, parts: ["#{token.value} #{tokens[index + 1].value}"], new_state: :default, next_index: index + 2 }
+          else
+            add_to_summary(token.value, :default, index + 1)
+          end
+        end
+
+        def self.handle_table_operation(token, tokens, index)
+          next_token = tokens[index + 1]&.value&.upcase
+
+          case next_token
+          when 'TABLE', 'INDEX', 'PROCEDURE', 'VIEW', 'DATABASE'
+            { processed: true, parts: ["#{token.value} #{next_token}"], new_state: :expect_collection, next_index: index + 2 }
+          else
+            add_to_summary(token.value, :default, index + 1)
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/tokenizer.rb b/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/tokenizer.rb
new file mode 100644
index 0000000000..19029e20ea
--- /dev/null
+++ b/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/tokenizer.rb
@@ -0,0 +1,58 @@
+# frozen_string_literal: true
+
+# Copyright The OpenTelemetry Authors
+#
+# SPDX-License-Identifier: Apache-2.0
+
+require 'strscan'
+
+module OpenTelemetry
+  module Helpers
+    module QuerySummary
+      # Tokenizer breaks down SQL queries into structured tokens for analysis.
+      #
+      # Parses SQL query strings into typed tokens (keywords, identifiers, operators, literals)
+      # for generating query summaries while filtering out sensitive data.
+      #
+      # @example
+      #   tokens = Tokenizer.tokenize("SELECT * FROM users WHERE id = 1")
+      #   # Returns tokens: [keyword: SELECT], [operator: *], [keyword: FROM], etc.
+      class Tokenizer
+        # Token holds the type (e.g., :keyword) and value (e.g., "SELECT")
+        Token = Struct.new(:type, :value)
+
+        # The order of token matching is important for correct parsing,
+        # as more specific patterns should be matched before more general ones.
+        TOKEN_REGEX = {
+          whitespace: /\s+/,
+          comment: %r{--[^\r\n]*|\/\*.*?\*\/}m,
+          numeric: /[+-]?(?:0x[0-9a-fA-F]+|\d+\.?\d*(?:[eE][+-]?\d+)?|\.\d+(?:[eE][+-]?\d+)?)/,
+          string: /'(?:''|[^'\r\n])*'?/,
+          quoted_identifier: /"(?:""|[^"\r\n])*"|`(?:``|[^`\r\n])*`|\[(?:[^\]\r\n])*\]/,
+          keyword: /\b(?:SELECT|INSERT|UPDATE|DELETE|FROM|INTO|JOIN|CREATE|ALTER|DROP|TRUNCATE|WITH|UNION|TABLE|INDEX|PROCEDURE|VIEW|DATABASE)\b/i,
+          identifier: /[a-zA-Z_][a-zA-Z0-9_.]*/,
+          operator: /<=|>=|<>|!=|[=<>+\-*\/%,;()!?]/
+        }.freeze
+
+        EXCLUDED_TYPES = %i[whitespace comment].freeze
+
+        def self.tokenize(query)
+          scanner = StringScanner.new(query)
+          tokens = []
+
+          until scanner.eos?
+            matched = TOKEN_REGEX.any? do |type, regex|
+              next unless (value = scanner.scan(regex))
+
+              tokens << Token.new(type, value) unless EXCLUDED_TYPES.include?(type)
+              true
+            end
+            scanner.getch unless matched
+          end
+
+          tokens
+        end
+      end
+    end
+  end
+end
diff --git a/helpers/sql-obfuscation/test/fixtures/query_summary.json b/helpers/sql-obfuscation/test/fixtures/query_summary.json
new file mode 100644
index 0000000000..bf16e40acc
--- /dev/null
+++ b/helpers/sql-obfuscation/test/fixtures/query_summary.json
@@ -0,0 +1,373 @@
+[
+    {
+        "name": "numeric_literal_integers",
+        "input": {
+            "query": "SELECT 12, -12, +12"
+        },
+        "expected": {
+            "db.query.summary": "SELECT"
+        }
+    },
+    {
+        "name": "caching_query_summaries",
+        "input": {
+            "query": "SELECT 12, -12, +12"
+        },
+        "expected": {
+            "db.query.summary": "SELECT"
+        }
+    },
+    {
+        "name": "numeric_literal_with_decimal_point",
+        "input": {
+            "query": "SELECT 12.34, -12.34, +12.34, .01, -.01"
+        },
+        "expected": {
+            "db.query.summary": "SELECT"
+        }
+    },
+    {
+        "name": "numeric_literal_exponential",
+        "input": {
+            "query": "SELECT 12.34e56, -12.34e56, +12.34e56"
+        },
+        "expected": {
+            "db.query.summary": "SELECT"
+        }
+    },
+    {
+        "name": "numeric_literal_negative_exponential",
+        "input": {
+            "query": "SELECT 12.34e-56, -12.34e-56, +12.34e-56"
+        },
+        "expected": {
+            "db.query.summary": "SELECT"
+        }
+    },
+    {
+        "name": "arithmetic_on_numeric_literals",
+        "input": {
+            "query": "SELECT 99+100"
+        },
+        "expected": {
+            "db.query.summary": "SELECT"
+        }
+    },
+    {
+        "name": "hex_literal",
+        "input": {
+            "query": "SELECT 0xDEADBEEF, 0XdeadBEEF"
+        },
+        "expected": {
+            "db.query.summary": "SELECT"
+        }
+    },
+    {
+        "name": "string_literal",
+        "input": {
+            "query": "SELECT 'hello'"
+        },
+        "expected": {
+            "db.query.summary": "SELECT"
+        }
+    },
+    {
+        "name": "string_literal_escaped_single_quote",
+        "input": {
+            "query": "SELECT 'My name''s not important'"
+        },
+        "expected": {
+            "db.query.summary": "SELECT"
+        }
+    },
+    {
+        "name": "string_with_embedded_newline",
+        "input": {
+            "query": "SELECT 'My name is \n not important'"
+        },
+        "expected": {
+            "db.query.summary": "SELECT"
+        }
+    },
+    {
+        "name": "numbers_in_identifiers",
+        "input": {
+            "query": "SELECT c3po, r2d2 FROM covid19 WHERE n1h1=1234"
+        },
+        "expected": {
+            "db.query.summary": "SELECT covid19"
+        }
+    },
+    {
+        "name": "periods_in_identifiers",
+        "input": {
+            "query": "SELECT a FROM dbo.Table JOIN dbo.AnotherTable"
+        },
+        "expected": {
+            "db.query.summary": "SELECT dbo.Table dbo.AnotherTable"
+        }
+    },
+    {
+        "name": "insert_into",
+        "input": {
+            "query": "INSERT INTO X VALUES(1, 23456, 123.456, 99+100)"
+        },
+        "expected": {
+            "db.query.summary": "INSERT X"
+        }
+    },
+    {
+        "name": "uuid",
+        "input": {
+            "query": "SELECT { guid '01234567-89ab-cdef-0123-456789abcdef' }"
+        },
+        "expected": {
+            "db.query.summary": "SELECT"
+        }
+    },
+    {
+        "name": "in_clause",
+        "input": {
+            "query": "SELECT * FROM table WHERE value IN (123, 456, 'abc')"
+        },
+        "expected": {
+            "db.query.summary": "SELECT table"
+        }
+    },
+    {
+        "name": "comments",
+        "input": {
+            "query": "SELECT column -- end of line comment\nFROM /* block \n comment */ table"
+        },
+        "expected": {
+            "db.query.summary": "SELECT table"
+        }
+    },
+    {
+        "name": "insert_into_select",
+        "input": {
+            "query": "INSERT INTO shipping_details\n(order_id,\naddress)\nSELECT order_id,\naddress\nFROM   orders\nWHERE  order_id = 1"
+        },
+        "expected": {
+            "db.query.summary": "INSERT shipping_details SELECT orders"
+        }
+    },
+    {
+        "name": "select_nested_query",
+        "input": {
+            "query": "SELECT order_date\nFROM   (SELECT *\nFROM   orders o\nJOIN customers c\nON o.customer_id = c.customer_id)"
+        },
+        "expected": {
+            "db.query.summary": "SELECT SELECT orders customers"
+        }
+    },
+    {
+        "name": "select_nested_query_case_preserved",
+        "input": {
+            "query": "SELEcT order_date\nFROM   (sELECT *\nFROM   orders o\nJOIN customers c\nON o.customer_id = c.customer_id)"
+        },
+        "expected": {
+            "db.query.summary": "SELEcT sELECT orders customers"
+        }
+    },
+    {
+        "name": "case_preserved",
+        "input": {
+            "query": "SELEcT order_date\nFROM ORders"
+        },
+        "expected": {
+            "db.query.summary": "SELEcT ORders"
+        }
+    },
+    {
+        "name": "cross_join",
+        "input": {
+            "query": "SELECT * FROM Orders o CROSS JOIN OrderDetails od"
+        },
+        "expected": {
+            "db.query.summary": "SELECT Orders OrderDetails"
+        }
+    },
+    {
+        "name": "cross_join_comma_separated_syntax",
+        "input": {
+            "query": "SELECT * FROM Orders o, OrderDetails od"
+        },
+        "expected": {
+            "db.query.summary": "SELECT Orders OrderDetails"
+        }
+    },
+    {
+        "name": "left_outer_join",
+        "input": {
+            "query": "SELECT c.name, o.id FROM customers c LEFT JOIN orders o ON c.id = o.customer_id"
+        },
+        "expected": {
+            "db.query.summary": "SELECT customers orders"
+        }
+    },
+    {
+        "name": "create_table",
+        "input": {
+            "query": "CREATE  TABLE MyTable (\n    ID NOT NULL IDENTITY(1,1) PRIMARY KEY\n)"
+        },
+        "expected": {
+            "db.query.summary": "CREATE TABLE MyTable"
+        }
+    },
+    {
+        "name": "alter_table",
+        "input": {
+            "query": "ALTER  TABLE MyTable ADD Name varchar(255)"
+        },
+        "expected": {
+            "db.query.summary": "ALTER TABLE MyTable"
+        }
+    },
+    {
+        "name": "drop_table",
+        "input": {
+            "query": "DROP  TABLE MyTable"
+        },
+        "expected": {
+            "db.query.summary": "DROP TABLE MyTable"
+        }
+    },
+    {
+        "name": "query_that_performs_multiple_operations",
+        "input": {
+            "query": "INSERT INTO shipping_details(order_id, address) SELECT order_id, address FROM orders WHERE  order_id = ?"
+        },
+        "expected": {
+            "db.query.summary": "INSERT shipping_details SELECT orders"
+        }
+    },
+    {
+        "name": "query_that_performs_an_operation_thats_applied_to_multiple_collections",
+        "input": {
+            "db.system.name": "other_sql",
+            "query": "SELECT * FROM   songs, artists WHERE songs.artist_id == artists.id"
+        },
+        "expected": {
+            "db.query.summary": "SELECT songs artists"
+        }
+    },
+    {
+        "name": "query_that_performs_operation_on_multiple_collections_with_double-quotes_or_other_punctuation",
+        "input": {
+            "query": "SELECT * FROM   \"song list\", 'artists'"
+        },
+        "expected": {
+            "db.query.summary": "SELECT \"song list\" 'artists'"
+        }
+    },
+    {
+        "name": "update_statement",
+        "input": {
+            "query": "UPDATE Customers SET ContactName = 'Alfred Schmidt', City= 'Frankfurt' WHERE CustomerID = 1"
+        },
+        "expected": {
+            "db.query.summary": "UPDATE Customers"
+        }
+    },
+    {
+        "name": "delete_statement",
+        "input": {
+            "query": "DELETE FROM Customers WHERE CustomerName='Alfreds Futterkiste'"
+        },
+        "expected": {
+            "db.query.summary": "DELETE Customers"
+        }
+    },
+    {
+        "name": "truncate_table_statement",
+        "input": {
+            "query": "TRUNCATE TABLE Customers"
+        },
+        "expected": {
+            "db.query.summary": "TRUNCATE TABLE Customers"
+        }
+    },
+    {
+        "name": "with_clause_cte",
+        "input": {
+            "query": "WITH regional_sales AS (SELECT region, SUM(amount) AS total_sales FROM orders GROUP BY region) SELECT region, total_sales FROM regional_sales WHERE total_sales > 1000"
+        },
+        "expected": {
+            "db.query.summary": "WITH regional_sales SELECT orders SELECT regional_sales"
+        }
+    },
+    {
+        "name": "union_statement",
+        "input": {
+            "query": "SELECT City FROM Customers UNION ALL SELECT City FROM Suppliers ORDER BY City"
+        },
+        "expected": {
+            "db.query.summary": "SELECT Customers UNION ALL SELECT Suppliers"
+        }
+    },
+    {
+        "name": "group_by_and_having_clauses",
+        "input": {
+            "query": "SELECT COUNT(CustomerID), Country FROM Customers WHERE Country != 'USA' GROUP BY Country HAVING COUNT(CustomerID) > 5"
+        },
+        "expected": {
+            "db.query.summary": "SELECT Customers"
+        }
+    },
+    {
+        "name": "boolean_and_null_literals",
+        "input": {
+            "query": "SELECT * FROM my_table WHERE a IS NOT NULL AND b = TRUE AND c = FALSE"
+        },
+        "expected": {
+            "db.query.summary": "SELECT my_table"
+        }
+    },
+    {
+        "name": "multiple_joins_and_aliases",
+        "input": {
+            "query": "SELECT o.OrderID, c.CustomerName, s.ShipperName FROM ((Orders AS o INNER JOIN Customers AS c ON o.CustomerID = c.CustomerID) INNER JOIN Shippers AS s ON o.ShipperID = s.ShipperID)"
+        },
+        "expected": {
+            "db.query.summary": "SELECT Orders Customers Shippers"
+        }
+    },
+    {
+        "name": "window_function_over_partition",
+        "input": {
+            "query": "SELECT name, salary, ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) as rank FROM employees"
+        },
+        "expected": {
+            "db.query.summary": "SELECT employees"
+        }
+    },
+    {
+        "name": "case_statement",
+        "input": {
+            "query": "SELECT OrderID, Quantity, CASE WHEN Quantity > 30 THEN 'Large' WHEN Quantity > 10 THEN 'Medium' ELSE 'Small' END AS QuantityText FROM OrderDetails"
+        },
+        "expected": {
+            "db.query.summary": "SELECT OrderDetails"
+        }
+    },
+    {
+        "name": "like_predicate",
+        "input": {
+            "query": "SELECT * FROM products WHERE product_name LIKE 'Chai%'"
+        },
+        "expected": {
+            "db.query.summary": "SELECT products"
+        }
+    },
+    {
+        "name": "between_predicate",
+        "input": {
+            "query": "SELECT * FROM products WHERE price BETWEEN 10 AND 20"
+        },
+        "expected": {
+            "db.query.summary": "SELECT products"
+        }
+    }
+]
+
diff --git a/helpers/sql-obfuscation/test/helpers/query_summary_test.rb b/helpers/sql-obfuscation/test/helpers/query_summary_test.rb
new file mode 100644
index 0000000000..1efeed7f9d
--- /dev/null
+++ b/helpers/sql-obfuscation/test/helpers/query_summary_test.rb
@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+
+# Copyright The OpenTelemetry Authors
+#
+# SPDX-License-Identifier: Apache-2.0
+
+require_relative '../test_helper'
+require_relative '../../lib/opentelemetry/helpers/query_summary'
+
+class QuerySummaryTest < Minitest::Test
+  def self.load_fixture
+    data = File.read("#{Dir.pwd}/test/fixtures/query_summary.json")
+    JSON.parse(data)
+  end
+
+  def build_failure_message(query, expected_summary, actual_summary)
+    "Failed to generate query summary correctly.\n" \
+      "Input:    #{query}\n" \
+      "Expected: #{expected_summary}\n" \
+      "Actual:   #{actual_summary}\n"
+  end
+
+  load_fixture.each do |test_case|
+    name = test_case['name']
+    query = test_case['input']['query']
+    expected_summary = test_case['expected']['db.query.summary']
+
+    define_method(:"test_query_summary_#{name}") do
+      actual_summary = OpenTelemetry::Helpers::QuerySummary.generate_summary(query)
+      message = build_failure_message(query, expected_summary, actual_summary)
+
+      assert_equal(expected_summary, actual_summary, message)
+    end
+  end
+end

From 08a896404c34ab621ee53e547d45ff2a010e4b08 Mon Sep 17 00:00:00 2001
From: Hannah Ramadan <hannahr315@gmail.com>
Date: Wed, 3 Sep 2025 15:21:42 -0700
Subject: [PATCH 2/6] Code and test updates

---
 .../helpers/query_summary/cache.rb            | 11 ++-
 .../helpers/query_summary/parser.rb           | 78 ++++++++++++-------
 .../helpers/query_summary/tokenizer.rb        | 20 ++---
 .../test/fixtures/query_summary.json          | 45 +++++++++++
 4 files changed, 115 insertions(+), 39 deletions(-)

diff --git a/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/cache.rb b/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/cache.rb
index 5f70bae1d9..3546a40f4b 100644
--- a/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/cache.rb
+++ b/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/cache.rb
@@ -22,11 +22,14 @@ class Cache
         @cache_size = DEFAULT_SIZE
 
         def self.fetch(key)
-          return @cache[key] if @cache.key?(key)
+          @cache_mutex.synchronize do
+            return @cache[key] if @cache.key?(key)
 
-          result = yield
-          store(key, result)
-          result
+            result = yield
+            @cache.shift if @cache.size >= @cache_size
+            @cache[key] = result
+            result
+          end
         end
 
         def self.configure(size: DEFAULT_SIZE)
diff --git a/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/parser.rb b/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/parser.rb
index 866761ee12..bf8e4adc7f 100644
--- a/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/parser.rb
+++ b/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/parser.rb
@@ -16,9 +16,18 @@ module QuerySummary
       #   tokens = [Token.new(:keyword, "SELECT"), Token.new(:identifier, "users")]
       #   Parser.build_summary_from_tokens(tokens) # => "SELECT users"
       class Parser
+        DEFAULT_STATE = :default
+        EXPECT_COLLECTION_STATE = :expect_collection
+
+        MAIN_OPERATIONS = %w[SELECT INSERT DELETE].freeze
+        COLLECTION_OPERATIONS = %w[WITH UPDATE].freeze
+        TRIGGER_COLLECTION = %w[FROM INTO JOIN IN].freeze
+        TABLE_OPERATIONS = %w[CREATE ALTER DROP TRUNCATE].freeze
+        TABLE_OBJECTS = %w[TABLE INDEX PROCEDURE VIEW DATABASE].freeze
+
         def self.build_summary_from_tokens(tokens)
           summary_parts = []
-          state = :default # Either :default or :expect_collection
+          state = DEFAULT_STATE
           skip_until = 0 # Next token index to process; allows skipping tokens already consumed by previous operations
 
           tokens.each_with_index do |token, index|
@@ -45,14 +54,16 @@ def self.process_token(token, tokens, index, state)
         end
 
         def self.process_main_operation(token, tokens, index, current_state)
-          case token.value.upcase
-          when 'SELECT', 'INSERT', 'DELETE'
-            add_to_summary(token.value, :default, index + 1)
-          when 'WITH', 'UPDATE'
-            add_to_summary(token.value, :expect_collection, index + 1)
-          when 'FROM', 'INTO', 'JOIN', 'IN'
+          upcased_value = token.value.upcase
+
+          case upcased_value
+          when *MAIN_OPERATIONS
+            add_to_summary(token.value, DEFAULT_STATE, index + 1)
+          when *COLLECTION_OPERATIONS
+            add_to_summary(token.value, EXPECT_COLLECTION_STATE, index + 1)
+          when *TRIGGER_COLLECTION
             trigger_collection_mode(index + 1)
-          when 'CREATE', 'ALTER', 'DROP', 'TRUNCATE'
+          when *TABLE_OPERATIONS
             handle_table_operation(token, tokens, index)
           when 'UNION'
             handle_union(token, tokens, index)
@@ -62,36 +73,49 @@ def self.process_main_operation(token, tokens, index, current_state)
         end
 
         def self.process_collection_token(token, tokens, index, state)
-          return { processed: false, parts: [], new_state: state, next_index: index + 1 } unless state == :expect_collection
+          return not_processed(state, index + 1) unless state == EXPECT_COLLECTION_STATE
 
           upcased_value = token.value.upcase
 
           if identifier_like?(token) || (token.type == :keyword && can_be_table_name?(upcased_value))
-            skip_count = calculate_alias_skip(tokens, index)
-            new_state = tokens[index + 1 + skip_count]&.value == ',' ? :expect_collection : :default
-            skip_count += 1 if tokens[index + 1 + skip_count]&.value == ','
-
-            { processed: true, parts: [token.value], new_state: new_state, next_index: index + 1 + skip_count }
+            handle_collection_identifier(token, tokens, index)
           elsif token.value == '(' || token.type == :operator
-            { processed: true, parts: [], new_state: state, next_index: index + 1 }
+            handle_collection_operator(token, state, index)
           else
-            { processed: true, parts: [], new_state: :default, next_index: index + 1 }
+            handle_collection_default(token, index)
           end
         end
 
+        def self.handle_collection_identifier(token, tokens, index)
+          skip_count = calculate_alias_skip(tokens, index)
+          new_state = tokens[index + 1 + skip_count]&.value == ',' ? EXPECT_COLLECTION_STATE : DEFAULT_STATE
+          skip_count += 1 if tokens[index + 1 + skip_count]&.value == ','
+
+          { processed: true, parts: [token.value], new_state: new_state, next_index: index + 1 + skip_count }
+        end
+
+        def self.handle_collection_operator(token, state, index)
+          { processed: true, parts: [], new_state: state, next_index: index + 1 }
+        end
+
+        def self.handle_collection_default(token, index)
+          { processed: true, parts: [], new_state: DEFAULT_STATE, next_index: index + 1 }
+        end
+
         def self.identifier_like?(token)
           %i[identifier quoted_identifier string].include?(token.type)
         end
 
         def self.can_be_table_name?(upcased_value)
           # Keywords that can also be used as table/object names in certain contexts
-          %w[TABLE INDEX PROCEDURE VIEW DATABASE].include?(upcased_value)
+          TABLE_OBJECTS.include?(upcased_value)
         end
 
         def self.calculate_alias_skip(tokens, index)
-          if tokens[index + 1]&.value&.upcase == 'AS'
+          next_token = tokens[index + 1]
+          if next_token && next_token.value&.upcase == 'AS'
             2  # Skip 'AS' and the alias
-          elsif tokens[index + 1]&.type == :identifier
+          elsif next_token && next_token.type == :identifier
             1  # Skip the alias
           else
             0
@@ -103,7 +127,7 @@ def self.add_to_summary(part, new_state, next_index)
         end
 
         def self.trigger_collection_mode(next_index)
-          { processed: true, parts: [], new_state: :expect_collection, next_index: next_index }
+          { processed: true, parts: [], new_state: EXPECT_COLLECTION_STATE, next_index: next_index }
         end
 
         def self.not_processed(current_state, next_index)
@@ -111,21 +135,23 @@ def self.not_processed(current_state, next_index)
         end
 
         def self.handle_union(token, tokens, index)
-          if tokens[index + 1]&.value&.upcase == 'ALL'
-            { processed: true, parts: ["#{token.value} #{tokens[index + 1].value}"], new_state: :default, next_index: index + 2 }
+          next_token = tokens[index + 1]
+          if next_token && next_token.value&.upcase == 'ALL'
+            { processed: true, parts: ["#{token.value} #{next_token.value}"], new_state: DEFAULT_STATE, next_index: index + 2 }
           else
-            add_to_summary(token.value, :default, index + 1)
+            add_to_summary(token.value, DEFAULT_STATE, index + 1)
           end
         end
 
         def self.handle_table_operation(token, tokens, index)
-          next_token = tokens[index + 1]&.value&.upcase
+          next_token_obj = tokens[index + 1]
+          next_token = next_token_obj&.value&.upcase
 
           case next_token
           when 'TABLE', 'INDEX', 'PROCEDURE', 'VIEW', 'DATABASE'
-            { processed: true, parts: ["#{token.value} #{next_token}"], new_state: :expect_collection, next_index: index + 2 }
+            { processed: true, parts: ["#{token.value} #{next_token}"], new_state: EXPECT_COLLECTION_STATE, next_index: index + 2 }
           else
-            add_to_summary(token.value, :default, index + 1)
+            add_to_summary(token.value, DEFAULT_STATE, index + 1)
           end
         end
       end
diff --git a/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/tokenizer.rb b/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/tokenizer.rb
index 19029e20ea..c5f247a46d 100644
--- a/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/tokenizer.rb
+++ b/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/tokenizer.rb
@@ -40,18 +40,20 @@ def self.tokenize(query)
           scanner = StringScanner.new(query)
           tokens = []
 
-          until scanner.eos?
-            matched = TOKEN_REGEX.any? do |type, regex|
-              next unless (value = scanner.scan(regex))
-
-              tokens << Token.new(type, value) unless EXCLUDED_TYPES.include?(type)
-              true
-            end
-            scanner.getch unless matched
-          end
+          scan_next_token(scanner, tokens) until scanner.eos?
 
           tokens
         end
+
+        def self.scan_next_token(scanner, tokens)
+          matched = TOKEN_REGEX.any? do |type, regex|
+            next unless (value = scanner.scan(regex))
+
+            tokens << Token.new(type, value) unless EXCLUDED_TYPES.include?(type)
+            true
+          end
+          scanner.getch unless matched
+        end
       end
     end
   end
diff --git a/helpers/sql-obfuscation/test/fixtures/query_summary.json b/helpers/sql-obfuscation/test/fixtures/query_summary.json
index bf16e40acc..3578004738 100644
--- a/helpers/sql-obfuscation/test/fixtures/query_summary.json
+++ b/helpers/sql-obfuscation/test/fixtures/query_summary.json
@@ -17,6 +17,24 @@
             "db.query.summary": "SELECT"
         }
     },
+    {
+        "name": "nil_input",
+        "input": {
+            "query": null 
+        },
+        "expected": {
+            "db.query.summary": "UNKNOWN"
+        }
+    },
+    {
+        "name": "deeply_nested_subqueries",
+        "input": {
+            "query": "SELECT * FROM (SELECT * FROM (SELECT * FROM my_table))"
+        },
+        "expected": {
+            "db.query.summary": "SELECT SELECT SELECT my_table"
+        }
+    },
     {
         "name": "numeric_literal_with_decimal_point",
         "input": {
@@ -368,6 +386,33 @@
         "expected": {
             "db.query.summary": "SELECT products"
         }
+    },
+    {
+      "name": "create_index",
+      "input": {
+          "query": "CREATE INDEX idx_name ON MyTable (column1)"
+      },
+      "expected": {
+          "db.query.summary": "CREATE INDEX idx_name"
+      }
+    },
+    {
+      "name": "create_database",
+      "input": {
+        "query": "CREATE DATABASE my_db"
+      },
+      "expected": {
+          "db.query.summary": "CREATE DATABASE my_db"
+      }
+    },
+    {
+      "name": "create_procedure",
+      "input": {
+        "query": "CREATE PROCEDURE my_proc AS BEGIN SELECT * FROM MyTable END"
+      },
+      "expected": {
+        "db.query.summary": "CREATE PROCEDURE my_proc SELECT MyTable"
+      }
     }
 ]
 

From 56be5816ba6a7e6bd1857e225b3fc735fccff0cb Mon Sep 17 00:00:00 2001
From: Hannah Ramadan <hannahr315@gmail.com>
Date: Wed, 3 Sep 2025 15:28:28 -0700
Subject: [PATCH 3/6] rubocop edit

---
 .../lib/opentelemetry/helpers/query_summary/tokenizer.rb        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/tokenizer.rb b/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/tokenizer.rb
index c5f247a46d..16c4a56955 100644
--- a/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/tokenizer.rb
+++ b/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/tokenizer.rb
@@ -31,7 +31,7 @@ class Tokenizer
           quoted_identifier: /"(?:""|[^"\r\n])*"|`(?:``|[^`\r\n])*`|\[(?:[^\]\r\n])*\]/,
           keyword: /\b(?:SELECT|INSERT|UPDATE|DELETE|FROM|INTO|JOIN|CREATE|ALTER|DROP|TRUNCATE|WITH|UNION|TABLE|INDEX|PROCEDURE|VIEW|DATABASE)\b/i,
           identifier: /[a-zA-Z_][a-zA-Z0-9_.]*/,
-          operator: /<=|>=|<>|!=|[=<>+\-*\/%,;()!?]/
+          operator: %r{<=|>=|<>|!=|[=<>+\-*\/%,;()!?]}
         }.freeze
 
         EXCLUDED_TYPES = %i[whitespace comment].freeze

From 784f1a41d3b05997b142f2ca4132f9cb42eb4881 Mon Sep 17 00:00:00 2001
From: Hannah Ramadan <hannahr315@gmail.com>
Date: Thu, 4 Sep 2025 15:52:32 -0700
Subject: [PATCH 4/6] Refactoring, cache tests

---
 .../opentelemetry/helpers/query_summary.rb    |  28 ++-
 .../helpers/query_summary/cache.rb            |  40 ++--
 .../helpers/query_summary/parser.rb           | 200 +++++++++---------
 .../helpers/query_summary/tokenizer.rb        |  26 +--
 .../test/fixtures/query_summary.json          |  11 +-
 .../test/helpers/query_summary/cache_test.rb  |  20 ++
 .../{ => query_summary}/query_summary_test.rb |   4 +-
 7 files changed, 187 insertions(+), 142 deletions(-)
 create mode 100644 helpers/sql-obfuscation/test/helpers/query_summary/cache_test.rb
 rename helpers/sql-obfuscation/test/helpers/{ => query_summary}/query_summary_test.rb (90%)

diff --git a/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary.rb b/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary.rb
index f8146f65e0..b2fd844699 100644
--- a/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary.rb
+++ b/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary.rb
@@ -4,8 +4,8 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-require_relative 'query_summary/cache'
 require_relative 'query_summary/tokenizer'
+require_relative 'query_summary/cache'
 require_relative 'query_summary/parser'
 
 module OpenTelemetry
@@ -17,17 +17,25 @@ module Helpers
     #   QuerySummary.generate_summary("SELECT * FROM users WHERE id = 1")
     #   # => "SELECT users"
     module QuerySummary
-      def self.configure_cache(size: Cache::DEFAULT_SIZE)
-        Cache.configure(size: size)
-      end
+      class << self
+        def configure_cache(size: Cache::DEFAULT_SIZE)
+          cache_instance.configure(size: size)
+        end
+
+        def generate_summary(query)
+          cache_instance.fetch(query) do
+            tokens = Tokenizer.tokenize(query)
+            Parser.build_summary_from_tokens(tokens)
+          end
+        rescue StandardError
+          'UNKNOWN'
+        end
+
+        private
 
-      def self.generate_summary(query)
-        Cache.fetch(query) do
-          tokens = Tokenizer.tokenize(query)
-          Parser.build_summary_from_tokens(tokens)
+        def cache_instance
+          @cache_instance ||= Cache.new
         end
-      rescue StandardError
-        'UNKNOWN'
       end
     end
   end
diff --git a/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/cache.rb b/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/cache.rb
index 3546a40f4b..56486faccc 100644
--- a/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/cache.rb
+++ b/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/cache.rb
@@ -2,7 +2,7 @@
 
 # Copyright The OpenTelemetry Authors
 #
-# SPDX-License-Identifier: Apache-2.0module OpenTelemetry
+# SPDX-License-Identifier: Apache-2.0
 
 module OpenTelemetry
   module Helpers
@@ -13,39 +13,43 @@ module QuerySummary
       # Uses mutex synchronization for thread safety.
       #
       # @example
-      #   Cache.fetch("SELECT * FROM users") { "SELECT users" } # => "SELECT users"
+      #   cache = Cache.new
+      #   cache.fetch("SELECT * FROM users") { "SELECT users" } # => "SELECT users"
       class Cache
         DEFAULT_SIZE = 1000
 
-        @cache = {}
-        @cache_mutex = Mutex.new
-        @cache_size = DEFAULT_SIZE
+        def initialize(size: DEFAULT_SIZE)
+          @cache = {}
+          @cache_mutex = Mutex.new
+          @cache_size = size
+        end
 
-        def self.fetch(key)
+        def fetch(key)
           @cache_mutex.synchronize do
             return @cache[key] if @cache.key?(key)
 
             result = yield
-            @cache.shift if @cache.size >= @cache_size
+            evict_if_needed
             @cache[key] = result
             result
           end
         end
 
-        def self.configure(size: DEFAULT_SIZE)
-          @cache_mutex.synchronize do
-            @cache_size = size
-            @cache.clear if @cache.size > size
-          end
+        private
+
+        def configure(size: DEFAULT_SIZE)
+          @cache_size = size
+          @cache.clear if @cache.size > size
         end
 
-        def self.store(key, value)
-          @cache_mutex.synchronize do
-            @cache.shift if @cache.size >= @cache_size
-            @cache[key] = value
-          end
+        def clear
+          @cache.clear
+        end
+
+        def evict_if_needed
+          @cache.shift if @cache.size >= @cache_size
         end
       end
     end
   end
-end
+end
\ No newline at end of file
diff --git a/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/parser.rb b/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/parser.rb
index bf8e4adc7f..c851dc63ce 100644
--- a/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/parser.rb
+++ b/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/parser.rb
@@ -25,133 +25,135 @@ class Parser
         TABLE_OPERATIONS = %w[CREATE ALTER DROP TRUNCATE].freeze
         TABLE_OBJECTS = %w[TABLE INDEX PROCEDURE VIEW DATABASE].freeze
 
-        def self.build_summary_from_tokens(tokens)
-          summary_parts = []
-          state = DEFAULT_STATE
-          skip_until = 0 # Next token index to process; allows skipping tokens already consumed by previous operations
+        class << self
+          def build_summary_from_tokens(tokens)
+            summary_parts = []
+            state = DEFAULT_STATE
+            skip_until = 0 # Next token index to process; allows skipping tokens already consumed by previous operations
 
-          tokens.each_with_index do |token, index|
-            next if index < skip_until # Skip already processed tokens
+            tokens.each_with_index do |token, index|
+              next if index < skip_until # Skip already processed tokens
 
-            result = process_token(token, tokens, index, state)
+              result = process_token(token, tokens, index, state)
 
-            summary_parts.concat(result[:parts])
-            state = result[:new_state]
-            skip_until = result[:next_index]
-          end
+              summary_parts.concat(result[:parts])
+              state = result[:new_state]
+              skip_until = result[:next_index]
+            end
 
-          summary_parts.join(' ')
-        end
+            summary_parts.join(' ')
+          end
 
-        def self.process_token(token, tokens, index, state)
-          operation_result = process_main_operation(token, tokens, index, state)
-          return operation_result if operation_result[:processed]
+          def process_token(token, tokens, index, state)
+            operation_result = process_main_operation(token, tokens, index, state)
+            return operation_result if operation_result[:processed]
 
-          collection_result = process_collection_token(token, tokens, index, state)
-          return collection_result if collection_result[:processed]
+            collection_result = process_collection_token(token, tokens, index, state)
+            return collection_result if collection_result[:processed]
 
-          { processed: false, parts: [], new_state: state, next_index: index + 1 }
-        end
+            { processed: false, parts: [], new_state: state, next_index: index + 1 }
+          end
 
-        def self.process_main_operation(token, tokens, index, current_state)
-          upcased_value = token.value.upcase
-
-          case upcased_value
-          when *MAIN_OPERATIONS
-            add_to_summary(token.value, DEFAULT_STATE, index + 1)
-          when *COLLECTION_OPERATIONS
-            add_to_summary(token.value, EXPECT_COLLECTION_STATE, index + 1)
-          when *TRIGGER_COLLECTION
-            trigger_collection_mode(index + 1)
-          when *TABLE_OPERATIONS
-            handle_table_operation(token, tokens, index)
-          when 'UNION'
-            handle_union(token, tokens, index)
-          else
-            not_processed(current_state, index + 1)
+          def process_main_operation(token, tokens, index, current_state)
+            upcased_value = token.value.upcase
+
+            case upcased_value
+            when *MAIN_OPERATIONS
+              add_to_summary(token.value, DEFAULT_STATE, index + 1)
+            when *COLLECTION_OPERATIONS
+              add_to_summary(token.value, EXPECT_COLLECTION_STATE, index + 1)
+            when *TRIGGER_COLLECTION
+              trigger_collection_mode(index + 1)
+            when *TABLE_OPERATIONS
+              handle_table_operation(token, tokens, index)
+            when 'UNION'
+              handle_union(token, tokens, index)
+            else
+              not_processed(current_state, index + 1)
+            end
           end
-        end
 
-        def self.process_collection_token(token, tokens, index, state)
-          return not_processed(state, index + 1) unless state == EXPECT_COLLECTION_STATE
+          def process_collection_token(token, tokens, index, state)
+            return not_processed(state, index + 1) unless state == EXPECT_COLLECTION_STATE
 
-          upcased_value = token.value.upcase
+            upcased_value = token.value.upcase
 
-          if identifier_like?(token) || (token.type == :keyword && can_be_table_name?(upcased_value))
-            handle_collection_identifier(token, tokens, index)
-          elsif token.value == '(' || token.type == :operator
-            handle_collection_operator(token, state, index)
-          else
-            handle_collection_default(token, index)
+            if identifier_like?(token) || (token.type == :keyword && can_be_table_name?(upcased_value))
+              handle_collection_identifier(token, tokens, index)
+            elsif token.value == '(' || token.type == :operator
+              handle_collection_operator(token, state, index)
+            else
+              handle_collection_default(token, index)
+            end
           end
-        end
 
-        def self.handle_collection_identifier(token, tokens, index)
-          skip_count = calculate_alias_skip(tokens, index)
-          new_state = tokens[index + 1 + skip_count]&.value == ',' ? EXPECT_COLLECTION_STATE : DEFAULT_STATE
-          skip_count += 1 if tokens[index + 1 + skip_count]&.value == ','
+          def handle_collection_identifier(token, tokens, index)
+            skip_count = calculate_alias_skip(tokens, index)
+            new_state = tokens[index + 1 + skip_count]&.value == ',' ? EXPECT_COLLECTION_STATE : DEFAULT_STATE
+            skip_count += 1 if tokens[index + 1 + skip_count]&.value == ','
 
-          { processed: true, parts: [token.value], new_state: new_state, next_index: index + 1 + skip_count }
-        end
+            { processed: true, parts: [token.value], new_state: new_state, next_index: index + 1 + skip_count }
+          end
 
-        def self.handle_collection_operator(token, state, index)
-          { processed: true, parts: [], new_state: state, next_index: index + 1 }
-        end
+          def handle_collection_operator(token, state, index)
+            { processed: true, parts: [], new_state: state, next_index: index + 1 }
+          end
 
-        def self.handle_collection_default(token, index)
-          { processed: true, parts: [], new_state: DEFAULT_STATE, next_index: index + 1 }
-        end
+          def handle_collection_default(token, index)
+            { processed: true, parts: [], new_state: DEFAULT_STATE, next_index: index + 1 }
+          end
 
-        def self.identifier_like?(token)
-          %i[identifier quoted_identifier string].include?(token.type)
-        end
+          def identifier_like?(token)
+            %i[identifier quoted_identifier string].include?(token.type)
+          end
 
-        def self.can_be_table_name?(upcased_value)
-          # Keywords that can also be used as table/object names in certain contexts
-          TABLE_OBJECTS.include?(upcased_value)
-        end
+          def can_be_table_name?(upcased_value)
+            # Keywords that can also be used as table/object names in certain contexts
+            TABLE_OBJECTS.include?(upcased_value)
+          end
 
-        def self.calculate_alias_skip(tokens, index)
-          next_token = tokens[index + 1]
-          if next_token && next_token.value&.upcase == 'AS'
-            2  # Skip 'AS' and the alias
-          elsif next_token && next_token.type == :identifier
-            1  # Skip the alias
-          else
-            0
+          def calculate_alias_skip(tokens, index)
+            next_token = tokens[index + 1]
+            if next_token && next_token.value&.upcase == 'AS'
+              2  # Skip 'AS' and the alias
+            elsif next_token && next_token.type == :identifier
+              1  # Skip the alias
+            else
+              0
+            end
           end
-        end
 
-        def self.add_to_summary(part, new_state, next_index)
-          { processed: true, parts: [part], new_state: new_state, next_index: next_index }
-        end
+          def add_to_summary(part, new_state, next_index)
+            { processed: true, parts: [part], new_state: new_state, next_index: next_index }
+          end
 
-        def self.trigger_collection_mode(next_index)
-          { processed: true, parts: [], new_state: EXPECT_COLLECTION_STATE, next_index: next_index }
-        end
+          def trigger_collection_mode(next_index)
+            { processed: true, parts: [], new_state: EXPECT_COLLECTION_STATE, next_index: next_index }
+          end
 
-        def self.not_processed(current_state, next_index)
-          { processed: false, parts: [], new_state: current_state, next_index: next_index }
-        end
+          def not_processed(current_state, next_index)
+            { processed: false, parts: [], new_state: current_state, next_index: next_index }
+          end
 
-        def self.handle_union(token, tokens, index)
-          next_token = tokens[index + 1]
-          if next_token && next_token.value&.upcase == 'ALL'
-            { processed: true, parts: ["#{token.value} #{next_token.value}"], new_state: DEFAULT_STATE, next_index: index + 2 }
-          else
-            add_to_summary(token.value, DEFAULT_STATE, index + 1)
+          def handle_union(token, tokens, index)
+            next_token = tokens[index + 1]
+            if next_token && next_token.value&.upcase == 'ALL'
+              { processed: true, parts: ["#{token.value} #{next_token.value}"], new_state: DEFAULT_STATE, next_index: index + 2 }
+            else
+              add_to_summary(token.value, DEFAULT_STATE, index + 1)
+            end
           end
-        end
 
-        def self.handle_table_operation(token, tokens, index)
-          next_token_obj = tokens[index + 1]
-          next_token = next_token_obj&.value&.upcase
+          def handle_table_operation(token, tokens, index)
+            next_token_obj = tokens[index + 1]
+            next_token = next_token_obj&.value&.upcase
 
-          case next_token
-          when 'TABLE', 'INDEX', 'PROCEDURE', 'VIEW', 'DATABASE'
-            { processed: true, parts: ["#{token.value} #{next_token}"], new_state: EXPECT_COLLECTION_STATE, next_index: index + 2 }
-          else
-            add_to_summary(token.value, DEFAULT_STATE, index + 1)
+            case next_token
+            when 'TABLE', 'INDEX', 'PROCEDURE', 'VIEW', 'DATABASE'
+              { processed: true, parts: ["#{token.value} #{next_token}"], new_state: EXPECT_COLLECTION_STATE, next_index: index + 2 }
+            else
+              add_to_summary(token.value, DEFAULT_STATE, index + 1)
+            end
           end
         end
       end
diff --git a/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/tokenizer.rb b/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/tokenizer.rb
index 16c4a56955..6bad41cd8f 100644
--- a/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/tokenizer.rb
+++ b/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/tokenizer.rb
@@ -36,23 +36,25 @@ class Tokenizer
 
         EXCLUDED_TYPES = %i[whitespace comment].freeze
 
-        def self.tokenize(query)
-          scanner = StringScanner.new(query)
-          tokens = []
+        class << self
+          def tokenize(query)
+            scanner = StringScanner.new(query)
+            tokens = []
 
-          scan_next_token(scanner, tokens) until scanner.eos?
+            scan_next_token(scanner, tokens) until scanner.eos?
 
-          tokens
-        end
+            tokens
+          end
 
-        def self.scan_next_token(scanner, tokens)
-          matched = TOKEN_REGEX.any? do |type, regex|
-            next unless (value = scanner.scan(regex))
+          def scan_next_token(scanner, tokens)
+            matched = TOKEN_REGEX.any? do |type, regex|
+              next unless (value = scanner.scan(regex))
 
-            tokens << Token.new(type, value) unless EXCLUDED_TYPES.include?(type)
-            true
+              tokens << Token.new(type, value) unless EXCLUDED_TYPES.include?(type)
+              true
+            end
+            scanner.getch unless matched
           end
-          scanner.getch unless matched
         end
       end
     end
diff --git a/helpers/sql-obfuscation/test/fixtures/query_summary.json b/helpers/sql-obfuscation/test/fixtures/query_summary.json
index 3578004738..97f8c77b1f 100644
--- a/helpers/sql-obfuscation/test/fixtures/query_summary.json
+++ b/helpers/sql-obfuscation/test/fixtures/query_summary.json
@@ -413,6 +413,15 @@
       "expected": {
         "db.query.summary": "CREATE PROCEDURE my_proc SELECT MyTable"
       }
-    }
+    },
+    {
+        "name": "oracle_angle_quote",
+        "input": {
+            "query": "select * from foo where bar=q'<baz's>' and x=5"
+        },
+        "expected": {
+            "db.query.summary": "select foo"
+        }
+   }
 ]
 
diff --git a/helpers/sql-obfuscation/test/helpers/query_summary/cache_test.rb b/helpers/sql-obfuscation/test/helpers/query_summary/cache_test.rb
new file mode 100644
index 0000000000..2d401b5b06
--- /dev/null
+++ b/helpers/sql-obfuscation/test/helpers/query_summary/cache_test.rb
@@ -0,0 +1,20 @@
+require_relative '../../test_helper'
+require_relative '../../../lib/opentelemetry/helpers/query_summary/cache'
+
+class CacheTest < Minitest::Test
+  def setup
+    @cache = OpenTelemetry::Helpers::QuerySummary::Cache.new
+  end
+
+  def test_fetch_returns_new_value_when_key_does_not_exist
+    result = @cache.fetch('key1') { 'value1' }
+    assert_equal 'value1', result
+  end
+
+  def test_fetch_returns_value_when_key_exists
+    @cache.fetch('key1') { 'value1' }
+    result = @cache.fetch('key1') { 'different_value' }
+
+    assert_equal 'value1', result
+  end
+end
diff --git a/helpers/sql-obfuscation/test/helpers/query_summary_test.rb b/helpers/sql-obfuscation/test/helpers/query_summary/query_summary_test.rb
similarity index 90%
rename from helpers/sql-obfuscation/test/helpers/query_summary_test.rb
rename to helpers/sql-obfuscation/test/helpers/query_summary/query_summary_test.rb
index 1efeed7f9d..7009eac028 100644
--- a/helpers/sql-obfuscation/test/helpers/query_summary_test.rb
+++ b/helpers/sql-obfuscation/test/helpers/query_summary/query_summary_test.rb
@@ -4,8 +4,8 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-require_relative '../test_helper'
-require_relative '../../lib/opentelemetry/helpers/query_summary'
+require_relative '../../test_helper'
+require_relative '../../../lib/opentelemetry/helpers/query_summary'
 
 class QuerySummaryTest < Minitest::Test
   def self.load_fixture

From ee6ae1d167ac0b51ebed894488d5e97e15a9f69d Mon Sep 17 00:00:00 2001
From: Hannah Ramadan <hannahr315@gmail.com>
Date: Mon, 8 Sep 2025 14:18:33 -0700
Subject: [PATCH 5/6] Add tests for cache

---
 .../test/helpers/query_summary/cache_test.rb  | 35 +++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/helpers/sql-obfuscation/test/helpers/query_summary/cache_test.rb b/helpers/sql-obfuscation/test/helpers/query_summary/cache_test.rb
index 2d401b5b06..a194b2aad5 100644
--- a/helpers/sql-obfuscation/test/helpers/query_summary/cache_test.rb
+++ b/helpers/sql-obfuscation/test/helpers/query_summary/cache_test.rb
@@ -17,4 +17,39 @@ def test_fetch_returns_value_when_key_exists
 
     assert_equal 'value1', result
   end
+
+  def test_eviction_when_cache_size_exceeded
+    small_cache = OpenTelemetry::Helpers::QuerySummary::Cache.new(size: 2)
+
+    small_cache.fetch('key1') { 'value1' }
+    small_cache.fetch('key2') { 'value2' }
+    small_cache.fetch('key3') { 'value3' }
+
+    result = small_cache.fetch('key1') { 'new_value1' }
+    assert_equal 'new_value1', result
+  end
+
+  def test_cache_thread_safety
+    threads = Array.new(10) do |i|
+      Thread.new do
+        @cache.fetch('shared_key') { "thread_#{i}_value" }
+      end
+    end
+
+    results = threads.map(&:value)
+
+    assert_equal 1, results.uniq.size
+  end
+
+  def test_empty_string
+    @cache.fetch('') { 'empty_string_value' }
+
+    assert_equal 'empty_string_value', @cache.fetch('')
+  end
+
+  def test_nil
+    @cache.fetch(nil) { 'nil_value' }
+
+    assert_equal 'nil_value', @cache.fetch(nil)
+  end
 end

From c296a46c47142aa7504a978154d720f7d0182310 Mon Sep 17 00:00:00 2001
From: Hannah Ramadan <hannahr315@gmail.com>
Date: Mon, 8 Sep 2025 14:55:51 -0700
Subject: [PATCH 6/6] Add comments to parser

---
 .../helpers/query_summary/parser.rb           |  53 +-
 .../test/fixtures/query_summary.json          | 833 +++++++++---------
 2 files changed, 450 insertions(+), 436 deletions(-)

diff --git a/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/parser.rb b/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/parser.rb
index c851dc63ce..f275661366 100644
--- a/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/parser.rb
+++ b/helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/parser.rb
@@ -16,23 +16,24 @@ module QuerySummary
       #   tokens = [Token.new(:keyword, "SELECT"), Token.new(:identifier, "users")]
       #   Parser.build_summary_from_tokens(tokens) # => "SELECT users"
       class Parser
-        DEFAULT_STATE = :default
+        # Two states: normal parsing vs. waiting for table names
+        PARSING_STATE = :parsing
         EXPECT_COLLECTION_STATE = :expect_collection
 
-        MAIN_OPERATIONS = %w[SELECT INSERT DELETE].freeze
-        COLLECTION_OPERATIONS = %w[WITH UPDATE].freeze
-        TRIGGER_COLLECTION = %w[FROM INTO JOIN IN].freeze
-        TABLE_OPERATIONS = %w[CREATE ALTER DROP TRUNCATE].freeze
-        TABLE_OBJECTS = %w[TABLE INDEX PROCEDURE VIEW DATABASE].freeze
+        MAIN_OPERATIONS = %w[SELECT INSERT DELETE].freeze # Operations that start queries and need table names
+        COLLECTION_OPERATIONS = %w[WITH UPDATE].freeze # Operations that work with existing data and expect table names to follow
+        TRIGGER_COLLECTION = %w[FROM INTO JOIN IN].freeze # Keywords that signal a table name is coming next
+        TABLE_OPERATIONS = %w[CREATE ALTER DROP TRUNCATE].freeze # Database structure operations that create, modify, or remove objects
+        TABLE_OBJECTS = %w[TABLE INDEX PROCEDURE VIEW DATABASE].freeze # Types of database objects that can be created, modified, or removed
 
         class << self
           def build_summary_from_tokens(tokens)
             summary_parts = []
-            state = DEFAULT_STATE
-            skip_until = 0 # Next token index to process; allows skipping tokens already consumed by previous operations
+            state = PARSING_STATE
+            skip_until = 0 # Skip tokens we've already processed when looking ahead
 
             tokens.each_with_index do |token, index|
-              next if index < skip_until # Skip already processed tokens
+              next if index < skip_until
 
               result = process_token(token, tokens, index, state)
 
@@ -59,11 +60,11 @@ def process_main_operation(token, tokens, index, current_state)
 
             case upcased_value
             when *MAIN_OPERATIONS
-              add_to_summary(token.value, DEFAULT_STATE, index + 1)
+              add_to_summary(token.value, PARSING_STATE, index + 1)
             when *COLLECTION_OPERATIONS
               add_to_summary(token.value, EXPECT_COLLECTION_STATE, index + 1)
             when *TRIGGER_COLLECTION
-              trigger_collection_mode(index + 1)
+              expect_table_names_next(index + 1)
             when *TABLE_OPERATIONS
               handle_table_operation(token, tokens, index)
             when 'UNION'
@@ -79,17 +80,19 @@ def process_collection_token(token, tokens, index, state)
             upcased_value = token.value.upcase
 
             if identifier_like?(token) || (token.type == :keyword && can_be_table_name?(upcased_value))
-              handle_collection_identifier(token, tokens, index)
+              process_table_name_and_alias(token, tokens, index)
             elsif token.value == '(' || token.type == :operator
               handle_collection_operator(token, state, index)
             else
-              handle_collection_default(token, index)
+              return_to_normal_parsing(token, index)
             end
           end
 
-          def handle_collection_identifier(token, tokens, index)
+          def process_table_name_and_alias(token, tokens, index)
+            # Look ahead to skip table aliases (e.g., "users u" or "users AS u")
             skip_count = calculate_alias_skip(tokens, index)
-            new_state = tokens[index + 1 + skip_count]&.value == ',' ? EXPECT_COLLECTION_STATE : DEFAULT_STATE
+            # Check if there's a comma - if so, expect more table names in the list
+            new_state = tokens[index + 1 + skip_count]&.value == ',' ? EXPECT_COLLECTION_STATE : PARSING_STATE
             skip_count += 1 if tokens[index + 1 + skip_count]&.value == ','
 
             { processed: true, parts: [token.value], new_state: new_state, next_index: index + 1 + skip_count }
@@ -99,8 +102,8 @@ def handle_collection_operator(token, state, index)
             { processed: true, parts: [], new_state: state, next_index: index + 1 }
           end
 
-          def handle_collection_default(token, index)
-            { processed: true, parts: [], new_state: DEFAULT_STATE, next_index: index + 1 }
+          def return_to_normal_parsing(token, index)
+            { processed: true, parts: [], new_state: PARSING_STATE, next_index: index + 1 }
           end
 
           def identifier_like?(token)
@@ -108,16 +111,17 @@ def identifier_like?(token)
           end
 
           def can_be_table_name?(upcased_value)
-            # Keywords that can also be used as table/object names in certain contexts
+            # Object types that can appear after DDL operations
             TABLE_OBJECTS.include?(upcased_value)
           end
 
           def calculate_alias_skip(tokens, index)
+            # Handle both "table AS alias" and "table alias" patterns
             next_token = tokens[index + 1]
             if next_token && next_token.value&.upcase == 'AS'
-              2  # Skip 'AS' and the alias
+              2
             elsif next_token && next_token.type == :identifier
-              1  # Skip the alias
+              1
             else
               0
             end
@@ -127,7 +131,7 @@ def add_to_summary(part, new_state, next_index)
             { processed: true, parts: [part], new_state: new_state, next_index: next_index }
           end
 
-          def trigger_collection_mode(next_index)
+          def expect_table_names_next(next_index)
             { processed: true, parts: [], new_state: EXPECT_COLLECTION_STATE, next_index: next_index }
           end
 
@@ -138,13 +142,14 @@ def not_processed(current_state, next_index)
           def handle_union(token, tokens, index)
             next_token = tokens[index + 1]
             if next_token && next_token.value&.upcase == 'ALL'
-              { processed: true, parts: ["#{token.value} #{next_token.value}"], new_state: DEFAULT_STATE, next_index: index + 2 }
+              { processed: true, parts: ["#{token.value} #{next_token.value}"], new_state: PARSING_STATE, next_index: index + 2 }
             else
-              add_to_summary(token.value, DEFAULT_STATE, index + 1)
+              add_to_summary(token.value, PARSING_STATE, index + 1)
             end
           end
 
           def handle_table_operation(token, tokens, index)
+            # Combine DDL operations with object types: "CREATE TABLE", "DROP INDEX", etc.
             next_token_obj = tokens[index + 1]
             next_token = next_token_obj&.value&.upcase
 
@@ -152,7 +157,7 @@ def handle_table_operation(token, tokens, index)
             when 'TABLE', 'INDEX', 'PROCEDURE', 'VIEW', 'DATABASE'
               { processed: true, parts: ["#{token.value} #{next_token}"], new_state: EXPECT_COLLECTION_STATE, next_index: index + 2 }
             else
-              add_to_summary(token.value, DEFAULT_STATE, index + 1)
+              add_to_summary(token.value, PARSING_STATE, index + 1)
             end
           end
         end
diff --git a/helpers/sql-obfuscation/test/fixtures/query_summary.json b/helpers/sql-obfuscation/test/fixtures/query_summary.json
index 97f8c77b1f..ec0d505a4b 100644
--- a/helpers/sql-obfuscation/test/fixtures/query_summary.json
+++ b/helpers/sql-obfuscation/test/fixtures/query_summary.json
@@ -1,427 +1,436 @@
 [
-    {
-        "name": "numeric_literal_integers",
-        "input": {
-            "query": "SELECT 12, -12, +12"
-        },
-        "expected": {
-            "db.query.summary": "SELECT"
-        }
-    },
-    {
-        "name": "caching_query_summaries",
-        "input": {
-            "query": "SELECT 12, -12, +12"
-        },
-        "expected": {
-            "db.query.summary": "SELECT"
-        }
-    },
-    {
-        "name": "nil_input",
-        "input": {
-            "query": null 
-        },
-        "expected": {
-            "db.query.summary": "UNKNOWN"
-        }
-    },
-    {
-        "name": "deeply_nested_subqueries",
-        "input": {
-            "query": "SELECT * FROM (SELECT * FROM (SELECT * FROM my_table))"
-        },
-        "expected": {
-            "db.query.summary": "SELECT SELECT SELECT my_table"
-        }
-    },
-    {
-        "name": "numeric_literal_with_decimal_point",
-        "input": {
-            "query": "SELECT 12.34, -12.34, +12.34, .01, -.01"
-        },
-        "expected": {
-            "db.query.summary": "SELECT"
-        }
-    },
-    {
-        "name": "numeric_literal_exponential",
-        "input": {
-            "query": "SELECT 12.34e56, -12.34e56, +12.34e56"
-        },
-        "expected": {
-            "db.query.summary": "SELECT"
-        }
-    },
-    {
-        "name": "numeric_literal_negative_exponential",
-        "input": {
-            "query": "SELECT 12.34e-56, -12.34e-56, +12.34e-56"
-        },
-        "expected": {
-            "db.query.summary": "SELECT"
-        }
-    },
-    {
-        "name": "arithmetic_on_numeric_literals",
-        "input": {
-            "query": "SELECT 99+100"
-        },
-        "expected": {
-            "db.query.summary": "SELECT"
-        }
-    },
-    {
-        "name": "hex_literal",
-        "input": {
-            "query": "SELECT 0xDEADBEEF, 0XdeadBEEF"
-        },
-        "expected": {
-            "db.query.summary": "SELECT"
-        }
-    },
-    {
-        "name": "string_literal",
-        "input": {
-            "query": "SELECT 'hello'"
-        },
-        "expected": {
-            "db.query.summary": "SELECT"
-        }
-    },
-    {
-        "name": "string_literal_escaped_single_quote",
-        "input": {
-            "query": "SELECT 'My name''s not important'"
-        },
-        "expected": {
-            "db.query.summary": "SELECT"
-        }
-    },
-    {
-        "name": "string_with_embedded_newline",
-        "input": {
-            "query": "SELECT 'My name is \n not important'"
-        },
-        "expected": {
-            "db.query.summary": "SELECT"
-        }
-    },
-    {
-        "name": "numbers_in_identifiers",
-        "input": {
-            "query": "SELECT c3po, r2d2 FROM covid19 WHERE n1h1=1234"
-        },
-        "expected": {
-            "db.query.summary": "SELECT covid19"
-        }
-    },
-    {
-        "name": "periods_in_identifiers",
-        "input": {
-            "query": "SELECT a FROM dbo.Table JOIN dbo.AnotherTable"
-        },
-        "expected": {
-            "db.query.summary": "SELECT dbo.Table dbo.AnotherTable"
-        }
-    },
-    {
-        "name": "insert_into",
-        "input": {
-            "query": "INSERT INTO X VALUES(1, 23456, 123.456, 99+100)"
-        },
-        "expected": {
-            "db.query.summary": "INSERT X"
-        }
-    },
-    {
-        "name": "uuid",
-        "input": {
-            "query": "SELECT { guid '01234567-89ab-cdef-0123-456789abcdef' }"
-        },
-        "expected": {
-            "db.query.summary": "SELECT"
-        }
-    },
-    {
-        "name": "in_clause",
-        "input": {
-            "query": "SELECT * FROM table WHERE value IN (123, 456, 'abc')"
-        },
-        "expected": {
-            "db.query.summary": "SELECT table"
-        }
-    },
-    {
-        "name": "comments",
-        "input": {
-            "query": "SELECT column -- end of line comment\nFROM /* block \n comment */ table"
-        },
-        "expected": {
-            "db.query.summary": "SELECT table"
-        }
-    },
-    {
-        "name": "insert_into_select",
-        "input": {
-            "query": "INSERT INTO shipping_details\n(order_id,\naddress)\nSELECT order_id,\naddress\nFROM   orders\nWHERE  order_id = 1"
-        },
-        "expected": {
-            "db.query.summary": "INSERT shipping_details SELECT orders"
-        }
-    },
-    {
-        "name": "select_nested_query",
-        "input": {
-            "query": "SELECT order_date\nFROM   (SELECT *\nFROM   orders o\nJOIN customers c\nON o.customer_id = c.customer_id)"
-        },
-        "expected": {
-            "db.query.summary": "SELECT SELECT orders customers"
-        }
-    },
-    {
-        "name": "select_nested_query_case_preserved",
-        "input": {
-            "query": "SELEcT order_date\nFROM   (sELECT *\nFROM   orders o\nJOIN customers c\nON o.customer_id = c.customer_id)"
-        },
-        "expected": {
-            "db.query.summary": "SELEcT sELECT orders customers"
-        }
-    },
-    {
-        "name": "case_preserved",
-        "input": {
-            "query": "SELEcT order_date\nFROM ORders"
-        },
-        "expected": {
-            "db.query.summary": "SELEcT ORders"
-        }
-    },
-    {
-        "name": "cross_join",
-        "input": {
-            "query": "SELECT * FROM Orders o CROSS JOIN OrderDetails od"
-        },
-        "expected": {
-            "db.query.summary": "SELECT Orders OrderDetails"
-        }
-    },
-    {
-        "name": "cross_join_comma_separated_syntax",
-        "input": {
-            "query": "SELECT * FROM Orders o, OrderDetails od"
-        },
-        "expected": {
-            "db.query.summary": "SELECT Orders OrderDetails"
-        }
-    },
-    {
-        "name": "left_outer_join",
-        "input": {
-            "query": "SELECT c.name, o.id FROM customers c LEFT JOIN orders o ON c.id = o.customer_id"
-        },
-        "expected": {
-            "db.query.summary": "SELECT customers orders"
-        }
-    },
-    {
-        "name": "create_table",
-        "input": {
-            "query": "CREATE  TABLE MyTable (\n    ID NOT NULL IDENTITY(1,1) PRIMARY KEY\n)"
-        },
-        "expected": {
-            "db.query.summary": "CREATE TABLE MyTable"
-        }
-    },
-    {
-        "name": "alter_table",
-        "input": {
-            "query": "ALTER  TABLE MyTable ADD Name varchar(255)"
-        },
-        "expected": {
-            "db.query.summary": "ALTER TABLE MyTable"
-        }
-    },
-    {
-        "name": "drop_table",
-        "input": {
-            "query": "DROP  TABLE MyTable"
-        },
-        "expected": {
-            "db.query.summary": "DROP TABLE MyTable"
-        }
-    },
-    {
-        "name": "query_that_performs_multiple_operations",
-        "input": {
-            "query": "INSERT INTO shipping_details(order_id, address) SELECT order_id, address FROM orders WHERE  order_id = ?"
-        },
-        "expected": {
-            "db.query.summary": "INSERT shipping_details SELECT orders"
-        }
-    },
-    {
-        "name": "query_that_performs_an_operation_thats_applied_to_multiple_collections",
-        "input": {
-            "db.system.name": "other_sql",
-            "query": "SELECT * FROM   songs, artists WHERE songs.artist_id == artists.id"
-        },
-        "expected": {
-            "db.query.summary": "SELECT songs artists"
-        }
-    },
-    {
-        "name": "query_that_performs_operation_on_multiple_collections_with_double-quotes_or_other_punctuation",
-        "input": {
-            "query": "SELECT * FROM   \"song list\", 'artists'"
-        },
-        "expected": {
-            "db.query.summary": "SELECT \"song list\" 'artists'"
-        }
-    },
-    {
-        "name": "update_statement",
-        "input": {
-            "query": "UPDATE Customers SET ContactName = 'Alfred Schmidt', City= 'Frankfurt' WHERE CustomerID = 1"
-        },
-        "expected": {
-            "db.query.summary": "UPDATE Customers"
-        }
-    },
-    {
-        "name": "delete_statement",
-        "input": {
-            "query": "DELETE FROM Customers WHERE CustomerName='Alfreds Futterkiste'"
-        },
-        "expected": {
-            "db.query.summary": "DELETE Customers"
-        }
-    },
-    {
-        "name": "truncate_table_statement",
-        "input": {
-            "query": "TRUNCATE TABLE Customers"
-        },
-        "expected": {
-            "db.query.summary": "TRUNCATE TABLE Customers"
-        }
-    },
-    {
-        "name": "with_clause_cte",
-        "input": {
-            "query": "WITH regional_sales AS (SELECT region, SUM(amount) AS total_sales FROM orders GROUP BY region) SELECT region, total_sales FROM regional_sales WHERE total_sales > 1000"
-        },
-        "expected": {
-            "db.query.summary": "WITH regional_sales SELECT orders SELECT regional_sales"
-        }
-    },
-    {
-        "name": "union_statement",
-        "input": {
-            "query": "SELECT City FROM Customers UNION ALL SELECT City FROM Suppliers ORDER BY City"
-        },
-        "expected": {
-            "db.query.summary": "SELECT Customers UNION ALL SELECT Suppliers"
-        }
-    },
-    {
-        "name": "group_by_and_having_clauses",
-        "input": {
-            "query": "SELECT COUNT(CustomerID), Country FROM Customers WHERE Country != 'USA' GROUP BY Country HAVING COUNT(CustomerID) > 5"
-        },
-        "expected": {
-            "db.query.summary": "SELECT Customers"
-        }
-    },
-    {
-        "name": "boolean_and_null_literals",
-        "input": {
-            "query": "SELECT * FROM my_table WHERE a IS NOT NULL AND b = TRUE AND c = FALSE"
-        },
-        "expected": {
-            "db.query.summary": "SELECT my_table"
-        }
-    },
-    {
-        "name": "multiple_joins_and_aliases",
-        "input": {
-            "query": "SELECT o.OrderID, c.CustomerName, s.ShipperName FROM ((Orders AS o INNER JOIN Customers AS c ON o.CustomerID = c.CustomerID) INNER JOIN Shippers AS s ON o.ShipperID = s.ShipperID)"
-        },
-        "expected": {
-            "db.query.summary": "SELECT Orders Customers Shippers"
-        }
-    },
-    {
-        "name": "window_function_over_partition",
-        "input": {
-            "query": "SELECT name, salary, ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) as rank FROM employees"
-        },
-        "expected": {
-            "db.query.summary": "SELECT employees"
-        }
-    },
-    {
-        "name": "case_statement",
-        "input": {
-            "query": "SELECT OrderID, Quantity, CASE WHEN Quantity > 30 THEN 'Large' WHEN Quantity > 10 THEN 'Medium' ELSE 'Small' END AS QuantityText FROM OrderDetails"
-        },
-        "expected": {
-            "db.query.summary": "SELECT OrderDetails"
-        }
-    },
-    {
-        "name": "like_predicate",
-        "input": {
-            "query": "SELECT * FROM products WHERE product_name LIKE 'Chai%'"
-        },
-        "expected": {
-            "db.query.summary": "SELECT products"
-        }
-    },
-    {
-        "name": "between_predicate",
-        "input": {
-            "query": "SELECT * FROM products WHERE price BETWEEN 10 AND 20"
-        },
-        "expected": {
-            "db.query.summary": "SELECT products"
-        }
-    },
-    {
-      "name": "create_index",
+  {
+      "name": "numeric_literal_integers",
       "input": {
-          "query": "CREATE INDEX idx_name ON MyTable (column1)"
+          "query": "SELECT 12, -12, +12"
       },
       "expected": {
-          "db.query.summary": "CREATE INDEX idx_name"
+          "db.query.summary": "SELECT"
       }
-    },
-    {
-      "name": "create_database",
+  },
+  {
+      "name": "caching_query_summaries",
       "input": {
-        "query": "CREATE DATABASE my_db"
+          "query": "SELECT 12, -12, +12"
       },
       "expected": {
-          "db.query.summary": "CREATE DATABASE my_db"
+          "db.query.summary": "SELECT"
       }
-    },
-    {
-      "name": "create_procedure",
+  },
+  {
+      "name": "nil_input",
       "input": {
-        "query": "CREATE PROCEDURE my_proc AS BEGIN SELECT * FROM MyTable END"
+          "query": null 
       },
       "expected": {
-        "db.query.summary": "CREATE PROCEDURE my_proc SELECT MyTable"
+          "db.query.summary": "UNKNOWN"
       }
-    },
-    {
-        "name": "oracle_angle_quote",
-        "input": {
-            "query": "select * from foo where bar=q'<baz's>' and x=5"
-        },
-        "expected": {
-            "db.query.summary": "select foo"
-        }
-   }
+  },
+  {
+      "name": "deeply_nested_subqueries",
+      "input": {
+          "query": "SELECT * FROM (SELECT * FROM (SELECT * FROM my_table))"
+      },
+      "expected": {
+          "db.query.summary": "SELECT SELECT SELECT my_table"
+      }
+  },
+  {
+      "name": "numeric_literal_with_decimal_point",
+      "input": {
+          "query": "SELECT 12.34, -12.34, +12.34, .01, -.01"
+      },
+      "expected": {
+          "db.query.summary": "SELECT"
+      }
+  },
+  {
+      "name": "numeric_literal_exponential",
+      "input": {
+          "query": "SELECT 12.34e56, -12.34e56, +12.34e56"
+      },
+      "expected": {
+          "db.query.summary": "SELECT"
+      }
+  },
+  {
+      "name": "numeric_literal_negative_exponential",
+      "input": {
+          "query": "SELECT 12.34e-56, -12.34e-56, +12.34e-56"
+      },
+      "expected": {
+          "db.query.summary": "SELECT"
+      }
+  },
+  {
+      "name": "arithmetic_on_numeric_literals",
+      "input": {
+          "query": "SELECT 99+100"
+      },
+      "expected": {
+          "db.query.summary": "SELECT"
+      }
+  },
+  {
+      "name": "hex_literal",
+      "input": {
+          "query": "SELECT 0xDEADBEEF, 0XdeadBEEF"
+      },
+      "expected": {
+          "db.query.summary": "SELECT"
+      }
+  },
+  {
+      "name": "string_literal",
+      "input": {
+          "query": "SELECT 'hello'"
+      },
+      "expected": {
+          "db.query.summary": "SELECT"
+      }
+  },
+  {
+      "name": "string_literal_escaped_single_quote",
+      "input": {
+          "query": "SELECT 'My name''s not important'"
+      },
+      "expected": {
+          "db.query.summary": "SELECT"
+      }
+  },
+  {
+      "name": "string_with_embedded_newline",
+      "input": {
+          "query": "SELECT 'My name is \n not important'"
+      },
+      "expected": {
+          "db.query.summary": "SELECT"
+      }
+  },
+  {
+      "name": "numbers_in_identifiers",
+      "input": {
+          "query": "SELECT c3po, r2d2 FROM covid19 WHERE n1h1=1234"
+      },
+      "expected": {
+          "db.query.summary": "SELECT covid19"
+      }
+  },
+  {
+      "name": "periods_in_identifiers",
+      "input": {
+          "query": "SELECT a FROM dbo.Table JOIN dbo.AnotherTable"
+      },
+      "expected": {
+          "db.query.summary": "SELECT dbo.Table dbo.AnotherTable"
+      }
+  },
+  {
+      "name": "insert_into",
+      "input": {
+          "query": "INSERT INTO X VALUES(1, 23456, 123.456, 99+100)"
+      },
+      "expected": {
+          "db.query.summary": "INSERT X"
+      }
+  },
+  {
+      "name": "uuid",
+      "input": {
+          "query": "SELECT { guid '01234567-89ab-cdef-0123-456789abcdef' }"
+      },
+      "expected": {
+          "db.query.summary": "SELECT"
+      }
+  },
+  {
+      "name": "in_clause",
+      "input": {
+          "query": "SELECT * FROM table WHERE value IN (123, 456, 'abc')"
+      },
+      "expected": {
+          "db.query.summary": "SELECT table"
+      }
+  },
+  {
+      "name": "comments",
+      "input": {
+          "query": "SELECT column -- end of line comment\nFROM /* block \n comment */ table"
+      },
+      "expected": {
+          "db.query.summary": "SELECT table"
+      }
+  },
+  {
+      "name": "insert_into_select",
+      "input": {
+          "query": "INSERT INTO shipping_details\n(order_id,\naddress)\nSELECT order_id,\naddress\nFROM   orders\nWHERE  order_id = 1"
+      },
+      "expected": {
+          "db.query.summary": "INSERT shipping_details SELECT orders"
+      }
+  },
+  {
+      "name": "select_nested_query",
+      "input": {
+          "query": "SELECT order_date\nFROM   (SELECT *\nFROM   orders o\nJOIN customers c\nON o.customer_id = c.customer_id)"
+      },
+      "expected": {
+          "db.query.summary": "SELECT SELECT orders customers"
+      }
+  },
+  {
+      "name": "select_nested_query_case_preserved",
+      "input": {
+          "query": "SELEcT order_date\nFROM   (sELECT *\nFROM   orders o\nJOIN customers c\nON o.customer_id = c.customer_id)"
+      },
+      "expected": {
+          "db.query.summary": "SELEcT sELECT orders customers"
+      }
+  },
+  {
+      "name": "case_preserved",
+      "input": {
+          "query": "SELEcT order_date\nFROM ORders"
+      },
+      "expected": {
+          "db.query.summary": "SELEcT ORders"
+      }
+  },
+  {
+      "name": "cross_join",
+      "input": {
+          "query": "SELECT * FROM Orders o CROSS JOIN OrderDetails od"
+      },
+      "expected": {
+          "db.query.summary": "SELECT Orders OrderDetails"
+      }
+  },
+  {
+      "name": "cross_join_comma_separated_syntax",
+      "input": {
+          "query": "SELECT * FROM Orders o, OrderDetails od"
+      },
+      "expected": {
+          "db.query.summary": "SELECT Orders OrderDetails"
+      }
+  },
+  {
+      "name": "left_outer_join",
+      "input": {
+          "query": "SELECT c.name, o.id FROM customers c LEFT JOIN orders o ON c.id = o.customer_id"
+      },
+      "expected": {
+          "db.query.summary": "SELECT customers orders"
+      }
+  },
+  {
+      "name": "create_table",
+      "input": {
+          "query": "CREATE  TABLE MyTable (\n    ID NOT NULL IDENTITY(1,1) PRIMARY KEY\n)"
+      },
+      "expected": {
+          "db.query.summary": "CREATE TABLE MyTable"
+      }
+  },
+  {
+      "name": "alter_table",
+      "input": {
+          "query": "ALTER  TABLE MyTable ADD Name varchar(255)"
+      },
+      "expected": {
+          "db.query.summary": "ALTER TABLE MyTable"
+      }
+  },
+  {
+      "name": "drop_table",
+      "input": {
+          "query": "DROP  TABLE MyTable"
+      },
+      "expected": {
+          "db.query.summary": "DROP TABLE MyTable"
+      }
+  },
+  {
+      "name": "query_that_performs_multiple_operations",
+      "input": {
+          "query": "INSERT INTO shipping_details(order_id, address) SELECT order_id, address FROM orders WHERE  order_id = ?"
+      },
+      "expected": {
+          "db.query.summary": "INSERT shipping_details SELECT orders"
+      }
+  },
+  {
+      "name": "query_that_performs_an_operation_thats_applied_to_multiple_collections",
+      "input": {
+          "db.system.name": "other_sql",
+          "query": "SELECT * FROM   songs, artists WHERE songs.artist_id == artists.id"
+      },
+      "expected": {
+          "db.query.summary": "SELECT songs artists"
+      }
+  },
+  {
+      "name": "query_that_performs_operation_on_multiple_collections_with_double-quotes_or_other_punctuation",
+      "input": {
+          "query": "SELECT * FROM   \"song list\", 'artists'"
+      },
+      "expected": {
+          "db.query.summary": "SELECT \"song list\" 'artists'"
+      }
+  },
+  {
+      "name": "update_statement",
+      "input": {
+          "query": "UPDATE Customers SET ContactName = 'Alfred Schmidt', City= 'Frankfurt' WHERE CustomerID = 1"
+      },
+      "expected": {
+          "db.query.summary": "UPDATE Customers"
+      }
+  },
+  {
+      "name": "delete_statement",
+      "input": {
+          "query": "DELETE FROM Customers WHERE CustomerName='Alfreds Futterkiste'"
+      },
+      "expected": {
+          "db.query.summary": "DELETE Customers"
+      }
+  },
+  {
+      "name": "truncate_table_statement",
+      "input": {
+          "query": "TRUNCATE TABLE Customers"
+      },
+      "expected": {
+          "db.query.summary": "TRUNCATE TABLE Customers"
+      }
+  },
+  {
+      "name": "with_clause_cte",
+      "input": {
+          "query": "WITH regional_sales AS (SELECT region, SUM(amount) AS total_sales FROM orders GROUP BY region) SELECT region, total_sales FROM regional_sales WHERE total_sales > 1000"
+      },
+      "expected": {
+          "db.query.summary": "WITH regional_sales SELECT orders SELECT regional_sales"
+      }
+  },
+  {
+      "name": "union_statement",
+      "input": {
+          "query": "SELECT City FROM Customers UNION ALL SELECT City FROM Suppliers ORDER BY City"
+      },
+      "expected": {
+          "db.query.summary": "SELECT Customers UNION ALL SELECT Suppliers"
+      }
+  },
+  {
+      "name": "group_by_and_having_clauses",
+      "input": {
+          "query": "SELECT COUNT(CustomerID), Country FROM Customers WHERE Country != 'USA' GROUP BY Country HAVING COUNT(CustomerID) > 5"
+      },
+      "expected": {
+          "db.query.summary": "SELECT Customers"
+      }
+  },
+  {
+      "name": "boolean_and_null_literals",
+      "input": {
+          "query": "SELECT * FROM my_table WHERE a IS NOT NULL AND b = TRUE AND c = FALSE"
+      },
+      "expected": {
+          "db.query.summary": "SELECT my_table"
+      }
+  },
+  {
+      "name": "multiple_joins_and_aliases",
+      "input": {
+          "query": "SELECT o.OrderID, c.CustomerName, s.ShipperName FROM ((Orders AS o INNER JOIN Customers AS c ON o.CustomerID = c.CustomerID) INNER JOIN Shippers AS s ON o.ShipperID = s.ShipperID)"
+      },
+      "expected": {
+          "db.query.summary": "SELECT Orders Customers Shippers"
+      }
+  },
+  {
+      "name": "window_function_over_partition",
+      "input": {
+          "query": "SELECT name, salary, ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) as rank FROM employees"
+      },
+      "expected": {
+          "db.query.summary": "SELECT employees"
+      }
+  },
+  {
+      "name": "case_statement",
+      "input": {
+          "query": "SELECT OrderID, Quantity, CASE WHEN Quantity > 30 THEN 'Large' WHEN Quantity > 10 THEN 'Medium' ELSE 'Small' END AS QuantityText FROM OrderDetails"
+      },
+      "expected": {
+          "db.query.summary": "SELECT OrderDetails"
+      }
+  },
+  {
+      "name": "like_predicate",
+      "input": {
+          "query": "SELECT * FROM products WHERE product_name LIKE 'Chai%'"
+      },
+      "expected": {
+          "db.query.summary": "SELECT products"
+      }
+  },
+  {
+      "name": "between_predicate",
+      "input": {
+          "query": "SELECT * FROM products WHERE price BETWEEN 10 AND 20"
+      },
+      "expected": {
+          "db.query.summary": "SELECT products"
+      }
+  },
+  {
+    "name": "create_index",
+    "input": {
+        "query": "CREATE INDEX idx_name ON MyTable (column1)"
+    },
+    "expected": {
+        "db.query.summary": "CREATE INDEX idx_name"
+    }
+  },
+  {
+    "name": "create_database",
+    "input": {
+      "query": "CREATE DATABASE my_db"
+    },
+    "expected": {
+        "db.query.summary": "CREATE DATABASE my_db"
+    }
+  },
+  {
+    "name": "create_procedure",
+    "input": {
+      "query": "CREATE PROCEDURE my_proc AS BEGIN SELECT * FROM MyTable END"
+    },
+    "expected": {
+      "db.query.summary": "CREATE PROCEDURE my_proc SELECT MyTable"
+    }
+  },
+  {
+    "name": "oracle_angle_quote",
+    "input": {
+        "query": "select * from foo where bar=q'<baz's>' and x=5"
+    },
+    "expected": {
+        "db.query.summary": "select foo"
+    }
+  },
+  {
+    "name": "cassandra_blobs",
+    "input" : {
+      "query": "select * from foo where bar=0xabcdef123 and x=5"
+    },
+    "expected": {
+        "db.query.summary": "select foo"
+    }
+  }
 ]