1
1
package logs
2
2
3
3
import (
4
+ "bytes"
4
5
"cmp"
5
6
"context"
6
7
"errors"
@@ -9,6 +10,7 @@ import (
9
10
"math"
10
11
11
12
"github.com/grafana/loki/v3/pkg/dataobj/internal/dataset"
13
+ "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd"
12
14
"github.com/grafana/loki/v3/pkg/dataobj/internal/result"
13
15
"github.com/grafana/loki/v3/pkg/util/loser"
14
16
)
@@ -60,9 +62,7 @@ func mergeTables(buf *tableBuffer, pageSize, pageRowCount int, compressionOpts d
60
62
messageBuilder = buf .Message (pageSize , pageRowCount , compressionOpts )
61
63
)
62
64
63
- var (
64
- tableSequences = make ([]* tableSequence , 0 , len (tables ))
65
- )
65
+ tableSequences := make ([]* tableSequence , 0 , len (tables ))
66
66
for _ , t := range tables {
67
67
dsetColumns , err := result .Collect (t .ListColumns (context .Background ()))
68
68
if err != nil {
@@ -96,6 +96,7 @@ func mergeTables(buf *tableBuffer, pageSize, pageRowCount int, compressionOpts d
96
96
tree := loser .New (tableSequences , maxValue , tableSequenceAt , CompareForSortOrder (sort ), tableSequenceClose )
97
97
defer tree .Close ()
98
98
99
+ var prev dataset.Row
99
100
for tree .Next () {
100
101
seq := tree .Winner ()
101
102
@@ -104,6 +105,12 @@ func mergeTables(buf *tableBuffer, pageSize, pageRowCount int, compressionOpts d
104
105
return nil , err
105
106
}
106
107
108
+ if equalRows (prev , row ) {
109
+ // Skip equal rows
110
+ continue
111
+ }
112
+ prev = row
113
+
107
114
for i , column := range seq .columns {
108
115
// column is guaranteed to be a *tableColumn since we got it from *table.
109
116
column := column .(* tableColumn )
@@ -258,3 +265,41 @@ func CompareRows(a, b dataset.Row) int {
258
265
}
259
266
return cmp .Compare (bTimestamp , aTimestamp )
260
267
}
268
+
269
+ // equalRows compares two rows for equality, column by column.
270
+ // a row is considered equal if all the columns are equal.
271
+ func equalRows (a , b dataset.Row ) bool {
272
+ if len (a .Values ) != len (b .Values ) {
273
+ return false
274
+ }
275
+
276
+ // The first two columns of each row are *always* stream ID and timestamp, so they will be checked first.
277
+ // This means equalRows will exit quickly for rows with different timestamps without reading the rest of the columns.
278
+ for i := 0 ; i < len (a .Values ); i ++ {
279
+ aType , bType := a .Values [i ].Type (), b .Values [i ].Type ()
280
+ if aType != bType {
281
+ return false
282
+ }
283
+
284
+ switch aType {
285
+ case datasetmd .PHYSICAL_TYPE_INT64 :
286
+ if a .Values [i ].Int64 () != b .Values [i ].Int64 () {
287
+ return false
288
+ }
289
+ case datasetmd .PHYSICAL_TYPE_UINT64 :
290
+ if a .Values [i ].Uint64 () != b .Values [i ].Uint64 () {
291
+ return false
292
+ }
293
+ case datasetmd .PHYSICAL_TYPE_BINARY :
294
+ if ! bytes .Equal (a .Values [i ].Binary (), b .Values [i ].Binary ()) {
295
+ return false
296
+ }
297
+ case datasetmd .PHYSICAL_TYPE_UNSPECIFIED :
298
+ continue
299
+ default :
300
+ return false
301
+ }
302
+ }
303
+
304
+ return true
305
+ }
0 commit comments