Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions core/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ dependencies {
testImplementation(platform(libs.junit.bom))
testImplementation(libs.junit.jupiter.api)
testImplementation(libs.junit.jupiter.engine)
testImplementation(libs.junit.jupiter.params)
testImplementation(libs.kotlin.test.junit5)

// Optional Android dependency
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,13 @@ enum class PatchProcessors : PatchProcessor {
override fun extractCodeBlocks(response: String) = matcher.extractCodeBlocks(response)
override fun getInitiatorPattern() = matcher.getInitiatorPattern()
override val matcher = FuzzyPatchMatcher()
},

Python {;
override val label = "Python"
override fun extractCodeBlocks(response: String) = matcher.extractCodeBlocks(response)
override fun getInitiatorPattern() = matcher.getInitiatorPattern()
override val matcher = PythonPatcher()
};

override val label: String get() = matcher.label
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,72 @@
package com.simiacryptus.cognotik.diff

import com.simiacryptus.cognotik.util.LoggerFactory
import org.apache.commons.text.similarity.LevenshteinDistance
import kotlin.math.floor
import kotlin.math.max
import com.simiacryptus.cognotik.util.LoggerFactory
import org.apache.commons.text.similarity.LevenshteinDistance
import kotlin.math.floor
import kotlin.math.max

/**
* PythonPatchUtil is an alternate diffing utility optimized for Python and YAML.
* In Python/YAML code the leading spaces (indentation) are significant, so our normalizer
* only removes trailing whitespace. The bracket‐metrics from IterativePatchUtil are omitted.
*/
object PythonPatchUtil {
class PythonPatcher : PatchProcessor {
override val label: String = "Python/YAML Patch Processor"

override val patchFormatPrompt = """
Response should use one or more code patches in diff format within ```diff code blocks.
Each diff should be preceded by a header that identifies the file being modified.
The diff format should use + for line additions, - for line deletions.
The diff should include 2 lines of context before and after every change.

IMPORTANT: For Python and YAML files, preserve leading whitespace (indentation) exactly.
Only trailing whitespace will be normalized during patch application.

Example:

Here are the patches:

### src/utils/example.py
```diff

def example_function():
- return 1
+ return 2

```

### config/settings.yaml
```diff

database:
- host: localhost
+ host: 127.0.0.1
port: 5432
```
""".trimIndent()

override fun getInitiatorPattern(): Regex {
return "(?s)```\\w*\n".toRegex()
}

override fun extractCodeBlocks(response: String): List<Pair<String, String>> {
val codeblockPattern = """(?s)(?<![^\n])```([^\n]*)\n(.*?)\n```""".toRegex()
val codeblockGreedyPattern = """(?s)(?<![^\n])```([^\n]*)\n(.*)\n```""".toRegex()
val findAll = codeblockPattern.findAll(response).toList()
val findAllGreedy = codeblockGreedyPattern.findAll(response).toList()
// Use greedy pattern if we find markdown blocks, otherwise use non-greedy
val matches = if (findAllGreedy.any { it.groupValues[1] == "markdown" }) {
findAllGreedy
} else {
findAll
}
return matches.map { match ->
val language = match.groupValues[1]
val code = match.groupValues[2].trim()
language to code
}
}

private enum class LineType { CONTEXT, ADD, DELETE }


Expand Down Expand Up @@ -50,7 +106,7 @@ object PythonPatchUtil {
/**
* Generate a patch from oldCode to newCode.
*/
fun generatePatch(oldCode: String, newCode: String): String {
override fun generatePatch(oldCode: String, newCode: String): String {
log.info("Starting python/yaml patch generation process")
val sourceLines = parseLines(oldCode)
val newLines = parseLines(newCode)
Expand All @@ -77,7 +133,7 @@ object PythonPatchUtil {
/**
* Applies a patch to the given source text.
*/
fun applyPatch(source: String, patch: String): String {
override fun applyPatch(source: String, patch: String): String {
log.info("Starting python/yaml patch application process")
val sourceLines = parseLines(source)
var patchLines = parsePatchLines(patch, sourceLines)
Expand Down Expand Up @@ -560,5 +616,5 @@ object PythonPatchUtil {
log.debug("Finished fixing patch line order for python/yaml")
}

private val log = LoggerFactory.getLogger(PythonPatchUtil::class.java)
private val log = LoggerFactory.getLogger(PythonPatcher::class.java)
}
14 changes: 14 additions & 0 deletions core/src/main/kotlin/com/simiacryptus/cognotik/util/isBinary.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package com.simiacryptus.cognotik.util

import java.io.InputStream

val String.isBinary: Boolean
get() {
val binary = this.toByteArray().filter { it < 0x20 || it > 0x7E }
return binary.size > this.length / 10
}
val InputStream.isBinary: Boolean
get() {
val binary = this.readBytes().filter { it < 0x20 || it > 0x7E }
return binary.size > this.available() / 10
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package com.simiacryptus.diff

import com.simiacryptus.cognotik.diff.FuzzyPatchMatcher
import com.simiacryptus.diff.PatchTestCase.Companion.test
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.MethodSource

class FuzzyPatchMatcherTest {

companion object {
@JvmStatic
fun testCases() = listOf(
"/patch_exact_match.json",
"/patch_add_line.json",
"/patch_modify_line.json",
"/patch_remove_line.json",
// "/patch_add_2_lines_variant_2.json",
// "/patch_add_2_lines_variant_3.json",
"/patch_from_data_1.json",
"/patch_from_data_2.json",
"/yaml_1.json"
)
}

@ParameterizedTest
@MethodSource("testCases")
fun testPatchApplication(resourceName: String) {
test(resourceName, FuzzyPatchMatcher.default)
}

}

Loading
Loading