From 56176897f7bad8bdec0c6a6db53223ac7683ba5e Mon Sep 17 00:00:00 2001
From: ksg <ksg97031@gmail.com>
Date: Sat, 25 Jan 2025 04:10:32 +0900
Subject: [PATCH 1/6] Refactor Ollama analyzer for stricter JSON use

Signed-off-by: ksg <ksg97031@gmail.com>
---
 .../analyzers/llm_analyzers/ollama.cr         | 79 ++++++++++++-------
 src/llm/ollama/ollama.cr                      |  1 +
 2 files changed, 51 insertions(+), 29 deletions(-)

diff --git a/src/analyzer/analyzers/llm_analyzers/ollama.cr b/src/analyzer/analyzers/llm_analyzers/ollama.cr
index f4ec7ab2..71142318 100644
--- a/src/analyzer/analyzers/llm_analyzers/ollama.cr
+++ b/src/analyzer/analyzers/llm_analyzers/ollama.cr
@@ -26,11 +26,27 @@ module Analyzer::AI
 
         # Filter files that are likely to contain endpoints
         filter_prompt = <<-PROMPT
-        !! Respond only in JSON format. Do not include explanations, comments, or any additional text. !!
-        ---
-        Analyze the following list of file paths and identify which files are likely to represent endpoints, including API endpoints, web pages, or static resources.
-        Exclude directories from the analysis and focus only on individual files.
-        Return the result as a JSON array of file paths that should be analyzed further.
+        Analyze the provided list of file paths and identify individual files that are likely to represent endpoints, such as API endpoints, web pages, or static resources. 
+        Ignore directories and focus exclusively on files.
+
+        Return the result strictly in the following JSON structure:
+        {
+          "files": [
+            "string / e.g., /path/to/file1",
+            "string / e.g., /path/to/file2",
+            "string / e.g., /path/to/file3"
+          ]
+        }
+
+        If no relevant files are found, return:
+        {
+          "files": []
+        }
+
+        Guidelines:
+        - Do not include directories in the output.
+        - Focus on files related to endpoints (API, web pages, or static resources).
+        - Provide only the JSON response with no explanations or additional text.
 
         File paths:
         #{all_paths.join("\n")}
@@ -40,7 +56,7 @@ module Analyzer::AI
         filtered_paths = JSON.parse(filter_response.to_s)
         logger.debug_sub filter_response
 
-        filtered_paths.as_a.each do |fpath|
+        filtered_paths["files"].as_a.each do |fpath|
           target_paths << fpath.as_s
         end
       else
@@ -61,30 +77,34 @@ module Analyzer::AI
 
               begin
                 prompt = <<-PROMPT
-                !! Respond only in JSON format. Do not include explanations, comments, or any additional text. !!
-                ---
-                Analyze the given source code and extract the endpoint and parameter details. Strictly follow this JSON structure:
-
-                [
-                  {
-                    "url": "string / e.g., /api/v1/users",
-                    "method": "string / e.g., GET, POST, PUT, DELETE",
-                    "params": [
-                      {
-                        "name": "string / e.g., id",
-                        "param_type": "string / one of: query, json, form, header, cookie, path",
-                        "value": "string / optional, default empty"
-                      }
-                    ]
-                  }
-                ]
-
-                - Ensure `param_type` uses only these values: `query`, `json`, `form`, `header`, `cookie`, `path`.
-                - If no endpoints are found in the code, respond with an empty array `[]`.
-                - Do not deviate from the specified JSON structure.
+                Analyze the provided source code to extract details about the endpoints and their parameters.
+
+                Return the result strictly in the following JSON structure:
+                {
+                  "endpoints": [
+                    {
+                      "url": "string / e.g., /api/v1/users",
+                      "method": "string / e.g., GET, POST, PUT, DELETE",
+                      "params": [
+                        {
+                          "name": "string / e.g., id",
+                          "param_type": "string / one of: query, json, form, header, cookie, path",
+                          "value": "string / optional, default empty"
+                        }
+                      ]
+                    }
+                  ]
+                }
+
+                If no endpoints are found, return:
+                {"endpoints": []}
+
+                Guidelines:
+                - `param_type` must strictly use one of these values: `query`, `json`, `form`, `header`, `cookie`, `path`.
+                - Do not include explanations, comments, or additional text.
+                - Provide only the JSON response as output.
 
                 Input Code:
-
                 #{content}
                 PROMPT
 
@@ -93,7 +113,8 @@ module Analyzer::AI
                 logger.debug_sub response
 
                 response_json = JSON.parse(response.to_s)
-                response_json.as_a.each do |endpoint|
+                next unless response_json["endpoints"].as_a.size > 0
+                response_json["endpoints"].as_a.each do |endpoint|
                   url = endpoint["url"].as_s
                   method = endpoint["method"].as_s
                   params = endpoint["params"].as_a.map do |param|
diff --git a/src/llm/ollama/ollama.cr b/src/llm/ollama/ollama.cr
index 4b7e3065..ad7a193c 100644
--- a/src/llm/ollama/ollama.cr
+++ b/src/llm/ollama/ollama.cr
@@ -11,6 +11,7 @@ module LLM
         :model  => @model,
         :prompt => prompt,
         :stream => false,
+        :format => "json",
       }
 
       response = Crest.post(@api, body, json: true)

From 56d6fbccc6b418484d46559ffd14e898c37271fc Mon Sep 17 00:00:00 2001
From: ksg <ksg97031@gmail.com>
Date: Sat, 25 Jan 2025 04:18:32 +0900
Subject: [PATCH 2/6] Standardize file path handling in Ollama analyzer

Signed-off-by: ksg <ksg97031@gmail.com>
---
 src/analyzer/analyzers/llm_analyzers/ollama.cr | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/analyzer/analyzers/llm_analyzers/ollama.cr b/src/analyzer/analyzers/llm_analyzers/ollama.cr
index 71142318..e044542f 100644
--- a/src/analyzer/analyzers/llm_analyzers/ollama.cr
+++ b/src/analyzer/analyzers/llm_analyzers/ollama.cr
@@ -49,7 +49,7 @@ module Analyzer::AI
         - Provide only the JSON response with no explanations or additional text.
 
         File paths:
-        #{all_paths.join("\n")}
+        #{all_paths.map { |path| "- \"#{File.expand_path(path)}\"" }.join("\n")}
         PROMPT
 
         filter_response = ollama.request(filter_prompt)

From 6c293f7fdd9dfbe327a7134da5bbc84e4692ad4f Mon Sep 17 00:00:00 2001
From: ksg <ksg97031@gmail.com>
Date: Sat, 25 Jan 2025 04:38:20 +0900
Subject: [PATCH 3/6] Refactor prompt for clarity and conciseness

Signed-off-by: ksg <ksg97031@gmail.com>
---
 src/analyzer/analyzers/llm_analyzers/ollama.cr | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/analyzer/analyzers/llm_analyzers/ollama.cr b/src/analyzer/analyzers/llm_analyzers/ollama.cr
index e044542f..74ba3740 100644
--- a/src/analyzer/analyzers/llm_analyzers/ollama.cr
+++ b/src/analyzer/analyzers/llm_analyzers/ollama.cr
@@ -26,7 +26,7 @@ module Analyzer::AI
 
         # Filter files that are likely to contain endpoints
         filter_prompt = <<-PROMPT
-        Analyze the provided list of file paths and identify individual files that are likely to represent endpoints, such as API endpoints, web pages, or static resources. 
+        Analyze the provided list of file paths and identify individual files that are likely to represent endpoints, such as API endpoints, web pages or static resources. 
         Ignore directories and focus exclusively on files.
 
         Return the result strictly in the following JSON structure:
@@ -45,7 +45,7 @@ module Analyzer::AI
 
         Guidelines:
         - Do not include directories in the output.
-        - Focus on files related to endpoints (API, web pages, or static resources).
+        - Focus on files related to endpoints (API, web pages or static resources).
         - Provide only the JSON response with no explanations or additional text.
 
         File paths:
@@ -100,9 +100,11 @@ module Analyzer::AI
                 {"endpoints": []}
 
                 Guidelines:
-                - `param_type` must strictly use one of these values: `query`, `json`, `form`, `header`, `cookie`, `path`.
-                - Do not include explanations, comments, or additional text.
-                - Provide only the JSON response as output.
+                - The JSON should include only the fields: "url", "method" and "params" for each endpoint.
+                - The "method" field should strictly use one of these values: GET, POST, PUT, DELETE.
+                - The "params" field should consist of "name", "param_type" and "value".
+                - "param_type" must strictly use one of these values: "query", "json", "form", "header", "cookie" and "path".
+                - Do not include explanations, comments or additional text.
 
                 Input Code:
                 #{content}

From 2e72c093966f19453596e52adb38b2621c7d992d Mon Sep 17 00:00:00 2001
From: ksg <ksg97031@gmail.com>
Date: Sat, 25 Jan 2025 16:21:58 +0900
Subject: [PATCH 4/6] Standardize response formatting across Ollama methods

---
 .../analyzers/llm_analyzers/ollama.cr         | 107 +++++++++++-------
 src/llm/ollama/ollama.cr                      |  22 +++-
 2 files changed, 85 insertions(+), 44 deletions(-)

diff --git a/src/analyzer/analyzers/llm_analyzers/ollama.cr b/src/analyzer/analyzers/llm_analyzers/ollama.cr
index 74ba3740..50f0b0fe 100644
--- a/src/analyzer/analyzers/llm_analyzers/ollama.cr
+++ b/src/analyzer/analyzers/llm_analyzers/ollama.cr
@@ -26,33 +26,30 @@ module Analyzer::AI
 
         # Filter files that are likely to contain endpoints
         filter_prompt = <<-PROMPT
-        Analyze the provided list of file paths and identify individual files that are likely to represent endpoints, such as API endpoints, web pages or static resources. 
-        Ignore directories and focus exclusively on files.
+        Analyze the following list of file paths and identify which files are likely to represent endpoints, including API endpoints, web pages, or static resources.
+        Exclude directories from the analysis and focus only on individual files.
+        Return the result as a JSON array of file paths that should be analyzed further.
 
-        Return the result strictly in the following JSON structure:
-        {
-          "files": [
-            "string / e.g., /path/to/file1",
-            "string / e.g., /path/to/file2",
-            "string / e.g., /path/to/file3"
-          ]
-        }
+        File paths:
+        #{all_paths.join("\n")}
+        PROMPT
 
-        If no relevant files are found, return:
+        format = <<-FORMAT
         {
-          "files": []
+          "type": "object",
+          "properties": {
+            "files": {
+              "type": "array",
+              "items": {
+                "type": "string"
+              }
+            }
+          },
+          "required": ["files"]
         }
+        FORMAT
 
-        Guidelines:
-        - Do not include directories in the output.
-        - Focus on files related to endpoints (API, web pages or static resources).
-        - Provide only the JSON response with no explanations or additional text.
-
-        File paths:
-        #{all_paths.map { |path| "- \"#{File.expand_path(path)}\"" }.join("\n")}
-        PROMPT
-
-        filter_response = ollama.request(filter_prompt)
+        filter_response = ollama.request_with_format(filter_prompt, format)
         filtered_paths = JSON.parse(filter_response.to_s)
         logger.debug_sub filter_response
 
@@ -79,23 +76,6 @@ module Analyzer::AI
                 prompt = <<-PROMPT
                 Analyze the provided source code to extract details about the endpoints and their parameters.
 
-                Return the result strictly in the following JSON structure:
-                {
-                  "endpoints": [
-                    {
-                      "url": "string / e.g., /api/v1/users",
-                      "method": "string / e.g., GET, POST, PUT, DELETE",
-                      "params": [
-                        {
-                          "name": "string / e.g., id",
-                          "param_type": "string / one of: query, json, form, header, cookie, path",
-                          "value": "string / optional, default empty"
-                        }
-                      ]
-                    }
-                  ]
-                }
-
                 If no endpoints are found, return:
                 {"endpoints": []}
 
@@ -110,7 +90,48 @@ module Analyzer::AI
                 #{content}
                 PROMPT
 
-                response = ollama.request(prompt)
+                format = <<-FORMAT
+                {
+                  "type": "object",
+                  "properties": {
+                    "endpoints": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "url": {
+                            "type": "string"
+                          },
+                          "method": {
+                            "type": "string"
+                          },
+                          "params": {
+                            "type": "array",
+                            "items": {
+                              "type": "object",
+                              "properties": {
+                                "name": {
+                                  "type": "string"
+                                },
+                                "param_type": {
+                                  "type": "string"
+                                },
+                                "value": {
+                                  "type": "string"
+                                }
+                              },
+                              "required": ["name", "param_type", "value"]
+                            }
+                          }
+                        },
+                        "required": ["url", "method", "params"]
+                      }
+                    }
+                  }
+                }
+                FORMAT
+
+                response = ollama.request_with_format(prompt, format)
                 logger.debug "Ollama response (#{relative_path}):"
                 logger.debug_sub response
 
@@ -130,8 +151,8 @@ module Analyzer::AI
                   @result << Endpoint.new(url, method, params, details)
                 end
               rescue ex : Exception
-                puts "Error processing file: #{path}"
-                puts "Error: #{ex.message}"
+                logger.debug "Error processing file: #{path}"
+                logger.debug "Error: #{ex.message}"
               end
             end
           end
@@ -145,7 +166,7 @@ module Analyzer::AI
     end
 
     def ignore_extensions
-      [".css", ".xml", ".json", ".yml", ".yaml", ".md", ".jpg", ".jpeg", ".png", ".gif", ".svg", ".ico", ".eot", ".ttf", ".woff", ".woff2", ".otf", ".mp3", ".mp4", ".avi", ".mov", ".webm", ".zip", ".tar", ".gz", ".7z", ".rar", ".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx", ".txt", ".csv", ".log", ".sql", ".bak", ".swp"]
+      [".css", ".xml", ".json", ".yml", ".yaml", ".md", ".jpg", ".jpeg", ".png", ".gif", ".svg", ".ico", ".eot", ".ttf", ".woff", ".woff2", ".otf", ".mp3", ".mp4", ".avi", ".mov", ".webm", ".zip", ".tar", ".gz", ".7z", ".rar", ".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx", ".txt", ".csv", ".log", ".sql", ".bak", ".swp", ".jar"]
     end
   end
 end
diff --git a/src/llm/ollama/ollama.cr b/src/llm/ollama/ollama.cr
index ad7a193c..c8c0788b 100644
--- a/src/llm/ollama/ollama.cr
+++ b/src/llm/ollama/ollama.cr
@@ -1,3 +1,5 @@
+require "json"
+
 module LLM
   class Ollama
     def initialize(url : String, model : String)
@@ -11,7 +13,25 @@ module LLM
         :model  => @model,
         :prompt => prompt,
         :stream => false,
-        :format => "json",
+      }
+
+      response = Crest.post(@api, body, json: true)
+      response_json = JSON.parse response.body
+
+      response_json["response"]
+    rescue ex : Exception
+      puts "Error: #{ex.message}"
+
+      ""
+    end
+
+    def request_with_format(prompt : String, format : String)
+      body = {
+        :model  => @model,
+        :prompt => prompt,
+        :stream => false,
+        :format => JSON.parse(format),
+        :temperature => 0.5,
       }
 
       response = Crest.post(@api, body, json: true)

From 4069803c7cdd5c5f4c5906213ffbb3d2fb1d8039 Mon Sep 17 00:00:00 2001
From: ksg <ksg97031@gmail.com>
Date: Sat, 25 Jan 2025 16:40:00 +0900
Subject: [PATCH 5/6] Refactor and consolidate Ollama request handling

Signed-off-by: ksg <ksg97031@gmail.com>
---
 .../analyzers/llm_analyzers/ollama.cr         | 24 +++++++++++--------
 src/llm/ollama/ollama.cr                      | 23 +++---------------
 2 files changed, 17 insertions(+), 30 deletions(-)

diff --git a/src/analyzer/analyzers/llm_analyzers/ollama.cr b/src/analyzer/analyzers/llm_analyzers/ollama.cr
index 50f0b0fe..2cb3be05 100644
--- a/src/analyzer/analyzers/llm_analyzers/ollama.cr
+++ b/src/analyzer/analyzers/llm_analyzers/ollama.cr
@@ -27,11 +27,17 @@ module Analyzer::AI
         # Filter files that are likely to contain endpoints
         filter_prompt = <<-PROMPT
         Analyze the following list of file paths and identify which files are likely to represent endpoints, including API endpoints, web pages, or static resources.
-        Exclude directories from the analysis and focus only on individual files.
-        Return the result as a JSON array of file paths that should be analyzed further.
 
-        File paths:
-        #{all_paths.join("\n")}
+        If no files are found, return:
+        {"files": []}
+
+        Guidelines:
+        - Focus only on individual files.
+        - Do not include directories.
+        - Do not include explanations, comments or additional text.
+
+        Input Files:
+        #{all_paths.map { |path| File.expand_path(path) }.join("\n")}
         PROMPT
 
         format = <<-FORMAT
@@ -49,7 +55,7 @@ module Analyzer::AI
         }
         FORMAT
 
-        filter_response = ollama.request_with_format(filter_prompt, format)
+        filter_response = ollama.request(filter_prompt, format)
         filtered_paths = JSON.parse(filter_response.to_s)
         logger.debug_sub filter_response
 
@@ -80,10 +86,8 @@ module Analyzer::AI
                 {"endpoints": []}
 
                 Guidelines:
-                - The JSON should include only the fields: "url", "method" and "params" for each endpoint.
-                - The "method" field should strictly use one of these values: GET, POST, PUT, DELETE.
-                - The "params" field should consist of "name", "param_type" and "value".
-                - "param_type" must strictly use one of these values: "query", "json", "form", "header", "cookie" and "path".
+                - The "method" field should strictly use one of these values: "GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD".
+                - The "param_type" must strictly use one of these values: "query", "json", "form", "header", "cookie" and "path".
                 - Do not include explanations, comments or additional text.
 
                 Input Code:
@@ -131,7 +135,7 @@ module Analyzer::AI
                 }
                 FORMAT
 
-                response = ollama.request_with_format(prompt, format)
+                response = ollama.request(prompt, format)
                 logger.debug "Ollama response (#{relative_path}):"
                 logger.debug_sub response
 
diff --git a/src/llm/ollama/ollama.cr b/src/llm/ollama/ollama.cr
index c8c0788b..9c871a4d 100644
--- a/src/llm/ollama/ollama.cr
+++ b/src/llm/ollama/ollama.cr
@@ -8,30 +8,13 @@ module LLM
       @model = model
     end
 
-    def request(prompt : String)
+    def request(prompt : String, format : String = "json")
       body = {
         :model  => @model,
         :prompt => prompt,
         :stream => false,
-      }
-
-      response = Crest.post(@api, body, json: true)
-      response_json = JSON.parse response.body
-
-      response_json["response"]
-    rescue ex : Exception
-      puts "Error: #{ex.message}"
-
-      ""
-    end
-
-    def request_with_format(prompt : String, format : String)
-      body = {
-        :model  => @model,
-        :prompt => prompt,
-        :stream => false,
-        :format => JSON.parse(format),
-        :temperature => 0.5,
+        :temperature => 0.3,
+        :format => format == "json" ? "json" : JSON.parse(format)
       }
 
       response = Crest.post(@api, body, json: true)

From 1641e38d26c76a2fc6db3d4e67c56e4b90aec903 Mon Sep 17 00:00:00 2001
From: ksg <ksg97031@gmail.com>
Date: Sat, 25 Jan 2025 16:50:21 +0900
Subject: [PATCH 6/6] Improve the Ollama analyzer for validating responses.

Signed-off-by: ksg <ksg97031@gmail.com>
---
 src/analyzer/analyzers/llm_analyzers/ollama.cr | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/src/analyzer/analyzers/llm_analyzers/ollama.cr b/src/analyzer/analyzers/llm_analyzers/ollama.cr
index 2cb3be05..135edd54 100644
--- a/src/analyzer/analyzers/llm_analyzers/ollama.cr
+++ b/src/analyzer/analyzers/llm_analyzers/ollama.cr
@@ -28,16 +28,13 @@ module Analyzer::AI
         filter_prompt = <<-PROMPT
         Analyze the following list of file paths and identify which files are likely to represent endpoints, including API endpoints, web pages, or static resources.
 
-        If no files are found, return:
-        {"files": []}
-
         Guidelines:
         - Focus only on individual files.
         - Do not include directories.
         - Do not include explanations, comments or additional text.
 
         Input Files:
-        #{all_paths.map { |path| File.expand_path(path) }.join("\n")}
+        #{all_paths.map { |path| "- #{File.expand_path(path)}" }.join("\n")}
         PROMPT
 
         format = <<-FORMAT
@@ -82,9 +79,6 @@ module Analyzer::AI
                 prompt = <<-PROMPT
                 Analyze the provided source code to extract details about the endpoints and their parameters.
 
-                If no endpoints are found, return:
-                {"endpoints": []}
-
                 Guidelines:
                 - The "method" field should strictly use one of these values: "GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD".
                 - The "param_type" must strictly use one of these values: "query", "json", "form", "header", "cookie" and "path".
@@ -131,7 +125,8 @@ module Analyzer::AI
                         "required": ["url", "method", "params"]
                       }
                     }
-                  }
+                  },
+                  "required": ["endpoints"]
                 }
                 FORMAT