Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

UTF-8 support in validation, and some parsers and formatters #537

Merged
merged 5 commits into from
Jan 23, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion config/http_config.go
Original file line number Diff line number Diff line change
@@ -30,7 +30,7 @@ import (
"sync"
"time"

"github.com/mwitkow/go-conntrack"
conntrack "github.com/mwitkow/go-conntrack"
"golang.org/x/net/http/httpproxy"
"golang.org/x/net/http2"
"golang.org/x/oauth2"
52 changes: 49 additions & 3 deletions expfmt/decode_test.go
Original file line number Diff line number Diff line change
@@ -17,6 +17,7 @@ import (
"bufio"
"errors"
"io"
"math"
"net/http"
"reflect"
"sort"
@@ -104,9 +105,10 @@ func TestProtoDecoder(t *testing.T) {
testTime := model.Now()

scenarios := []struct {
in string
expected model.Vector
fail bool
in string
expected model.Vector
legacyNameFail bool
fail bool
}{
{
in: "",
@@ -332,6 +334,30 @@ func TestProtoDecoder(t *testing.T) {
},
},
},
{
in: "\xa8\x01\n\ngauge.name\x12\x11gauge\ndoc\nstr\"ing\x18\x01\"T\n\x1b\n\x06name.1\x12\x11val with\nnew line\n*\n\x06name*2\x12 val with \\backslash and \"quotes\"\x12\t\t\x00\x00\x00\x00\x00\x00\xf0\x7f\"/\n\x10\n\x06name.1\x12\x06Björn\n\x10\n\x06name*2\x12\x06佖佥\x12\t\t\xd1\xcfD\xb9\xd0\x05\xc2H",
legacyNameFail: true,
expected: model.Vector{
&model.Sample{
Metric: model.Metric{
model.MetricNameLabel: "gauge.name",
"name.1": "val with\nnew line",
"name*2": "val with \\backslash and \"quotes\"",
},
Value: model.SampleValue(math.Inf(+1)),
Timestamp: testTime,
},
&model.Sample{
Metric: model.Metric{
model.MetricNameLabel: "gauge.name",
"name.1": "Björn",
"name*2": "佖佥",
},
Value: 3.14e42,
Timestamp: testTime,
},
},
},
}

for i, scenario := range scenarios {
@@ -344,11 +370,31 @@ func TestProtoDecoder(t *testing.T) {

var all model.Vector
for {
model.NameValidationScheme = model.LegacyValidation
var smpls model.Vector
err := dec.Decode(&smpls)
if err != nil && errors.Is(err, io.EOF) {
break
}
if scenario.legacyNameFail {
if err == nil {
t.Fatal("Expected error when decoding without UTF-8 support enabled but got none")
}
model.NameValidationScheme = model.UTF8Validation
dec = &SampleDecoder{
Dec: &protoDecoder{r: strings.NewReader(scenario.in)},
Opts: &DecodeOptions{
Timestamp: testTime,
},
}
err = dec.Decode(&smpls)
if errors.Is(err, io.EOF) {
break
}
if err != nil {
t.Fatalf("Unexpected error when decoding with UTF-8 support: %v", err)
}
}
if scenario.fail {
if err == nil {
t.Fatal("Expected error but got none")
8 changes: 7 additions & 1 deletion expfmt/expfmt.go
Original file line number Diff line number Diff line change
@@ -17,7 +17,13 @@ package expfmt
// Format specifies the HTTP content type of the different wire protocols.
type Format string

// Constants to assemble the Content-Type values for the different wire protocols.
// Constants to assemble the Content-Type values for the different wire
// protocols. The Content-Type strings here are all for the legacy exposition
// formats, where valid characters for metric names and label names are limited.
// Support for arbitrary UTF-8 characters in those names is already partially
// implemented in this module (see model.ValidationScheme), but to actually use
// it on the wire, new content-type strings will have to be agreed upon and
// added here.
const (
TextVersion = "0.0.4"
ProtoType = `application/vnd.google.protobuf`
85 changes: 58 additions & 27 deletions expfmt/openmetrics_create.go
Original file line number Diff line number Diff line change
@@ -35,6 +35,18 @@ import (
// sanity checks. If the input contains duplicate metrics or invalid metric or
// label names, the conversion will result in invalid text format output.
//
// If metric names conform to the legacy validation pattern, they will be placed
// outside the brackets in the traditional way, like `foo{}`. If the metric name
// fails the legacy validation check, it will be placed quoted inside the
// brackets: `{"foo"}`. As stated above, the input is assumed to be santized and
// no error will be thrown in this case.
//
// Similar to metric names, if label names conform to the legacy validation
// pattern, they will be unquoted as normal, like `foo{bar="baz"}`. If the label
// name fails the legacy validation check, it will be quoted:
// `foo{"bar"="baz"}`. As stated above, the input is assumed to be santized and
// no error will be thrown in this case.
//
// This function fulfills the type 'expfmt.encoder'.
//
// Note that OpenMetrics requires a final `# EOF` line. Since this function acts
@@ -98,7 +110,7 @@ func MetricFamilyToOpenMetrics(out io.Writer, in *dto.MetricFamily) (written int
if err != nil {
return
}
n, err = w.WriteString(shortName)
n, err = writeName(w, shortName)
written += n
if err != nil {
return
@@ -124,7 +136,7 @@ func MetricFamilyToOpenMetrics(out io.Writer, in *dto.MetricFamily) (written int
if err != nil {
return
}
n, err = w.WriteString(shortName)
n, err = writeName(w, shortName)
written += n
if err != nil {
return
@@ -303,21 +315,9 @@ func writeOpenMetricsSample(
floatValue float64, intValue uint64, useIntValue bool,
exemplar *dto.Exemplar,
) (int, error) {
var written int
n, err := w.WriteString(name)
written += n
if err != nil {
return written, err
}
if suffix != "" {
n, err = w.WriteString(suffix)
written += n
if err != nil {
return written, err
}
}
n, err = writeOpenMetricsLabelPairs(
w, metric.Label, additionalLabelName, additionalLabelValue,
written := 0
n, err := writeOpenMetricsNameAndLabelPairs(
w, name+suffix, metric.Label, additionalLabelName, additionalLabelValue,
)
written += n
if err != nil {
@@ -365,27 +365,58 @@ func writeOpenMetricsSample(
return written, nil
}

// writeOpenMetricsLabelPairs works like writeOpenMetrics but formats the float
// in OpenMetrics style.
func writeOpenMetricsLabelPairs(
// writeOpenMetricsNameAndLabelPairs works like writeOpenMetricsSample but
// formats the float in OpenMetrics style.
func writeOpenMetricsNameAndLabelPairs(
w enhancedWriter,
name string,
in []*dto.LabelPair,
additionalLabelName string, additionalLabelValue float64,
) (int, error) {
if len(in) == 0 && additionalLabelName == "" {
return 0, nil
}
var (
written int
separator byte = '{'
written int
separator byte = '{'
metricInsideBraces = false
)

if name != "" {
// If the name does not pass the legacy validity check, we must put the
// metric name inside the braces, quoted.
if !model.IsValidLegacyMetricName(model.LabelValue(name)) {
metricInsideBraces = true
err := w.WriteByte(separator)
written++
if err != nil {
return written, err
}
separator = ','
}

n, err := writeName(w, name)
written += n
if err != nil {
return written, err
}
}

if len(in) == 0 && additionalLabelName == "" {
if metricInsideBraces {
err := w.WriteByte('}')
written++
if err != nil {
return written, err
}
}
return written, nil
}

for _, lp := range in {
err := w.WriteByte(separator)
written++
if err != nil {
return written, err
}
n, err := w.WriteString(lp.GetName())
n, err := writeName(w, lp.GetName())
written += n
if err != nil {
return written, err
@@ -451,7 +482,7 @@ func writeExemplar(w enhancedWriter, e *dto.Exemplar) (int, error) {
if err != nil {
return written, err
}
n, err = writeOpenMetricsLabelPairs(w, e.Label, "", 0)
n, err = writeOpenMetricsNameAndLabelPairs(w, "", e.Label, "", 0)
written += n
if err != nil {
return written, err
133 changes: 125 additions & 8 deletions expfmt/openmetrics_create_test.go
Original file line number Diff line number Diff line change
@@ -82,7 +82,79 @@ name{labelname="val1",basename="basevalue"} 42.0
name{labelname="val2",basename="basevalue"} 0.23 1.23456789e+06
`,
},
// 1: Gauge, some escaping required, +Inf as value, multi-byte characters in label values.
// 1: Dots in name
{
in: &dto.MetricFamily{
Name: proto.String("name.with.dots"),
Help: proto.String("boring help"),
Type: dto.MetricType_COUNTER.Enum(),
Metric: []*dto.Metric{
{
Label: []*dto.LabelPair{
{
Name: proto.String("labelname"),
Value: proto.String("val1"),
},
{
Name: proto.String("basename"),
Value: proto.String("basevalue"),
},
},
Counter: &dto.Counter{
Value: proto.Float64(42),
},
},
{
Label: []*dto.LabelPair{
{
Name: proto.String("labelname"),
Value: proto.String("val2"),
},
{
Name: proto.String("basename"),
Value: proto.String("basevalue"),
},
},
Counter: &dto.Counter{
Value: proto.Float64(.23),
},
TimestampMs: proto.Int64(1234567890),
},
},
},
out: `# HELP "name.with.dots" boring help
# TYPE "name.with.dots" unknown
{"name.with.dots",labelname="val1",basename="basevalue"} 42.0
{"name.with.dots",labelname="val2",basename="basevalue"} 0.23 1.23456789e+06
`,
},
// 2: Dots in name, no labels
{
in: &dto.MetricFamily{
Name: proto.String("name.with.dots"),
Help: proto.String("boring help"),
Type: dto.MetricType_COUNTER.Enum(),
Metric: []*dto.Metric{
{
Counter: &dto.Counter{
Value: proto.Float64(42),
},
},
{
Counter: &dto.Counter{
Value: proto.Float64(.23),
},
TimestampMs: proto.Int64(1234567890),
},
},
},
out: `# HELP "name.with.dots" boring help
# TYPE "name.with.dots" unknown
{"name.with.dots"} 42.0
{"name.with.dots"} 0.23 1.23456789e+06
`,
},
// 3: Gauge, some escaping required, +Inf as value, multi-byte characters in label values.
{
in: &dto.MetricFamily{
Name: proto.String("gauge_name"),
@@ -127,7 +199,52 @@ gauge_name{name_1="val with\nnew line",name_2="val with \\backslash and \"quotes
gauge_name{name_1="Björn",name_2="佖佥"} 3.14e+42
`,
},
// 2: Unknown, no help, one sample with no labels and -Inf as value, another sample with one label.
// 4: Gauge, utf8, some escaping required, +Inf as value, multi-byte characters in label values.
{
in: &dto.MetricFamily{
Name: proto.String("gauge.name\""),
Help: proto.String("gauge\ndoc\nstr\"ing"),
Type: dto.MetricType_GAUGE.Enum(),
Metric: []*dto.Metric{
{
Label: []*dto.LabelPair{
{
Name: proto.String("name.1"),
Value: proto.String("val with\nnew line"),
},
{
Name: proto.String("name*2"),
Value: proto.String("val with \\backslash and \"quotes\""),
},
},
Gauge: &dto.Gauge{
Value: proto.Float64(math.Inf(+1)),
},
},
{
Label: []*dto.LabelPair{
{
Name: proto.String("name.1"),
Value: proto.String("Björn"),
},
{
Name: proto.String("name*2"),
Value: proto.String("佖佥"),
},
},
Gauge: &dto.Gauge{
Value: proto.Float64(3.14e42),
},
},
},
},
out: `# HELP "gauge.name\"" gauge\ndoc\nstr\"ing
# TYPE "gauge.name\"" gauge
{"gauge.name\"","name.1"="val with\nnew line","name*2"="val with \\backslash and \"quotes\""} +Inf
{"gauge.name\"","name.1"="Björn","name*2"="佖佥"} 3.14e+42
`,
},
// 5: Unknown, no help, one sample with no labels and -Inf as value, another sample with one label.
{
in: &dto.MetricFamily{
Name: proto.String("unknown_name"),
@@ -156,7 +273,7 @@ unknown_name -Inf
unknown_name{name_1="value 1"} -1.23e-45
`,
},
// 3: Summary.
// 6: Summary.
{
in: &dto.MetricFamily{
Name: proto.String("summary_name"),
@@ -229,7 +346,7 @@ summary_name_sum{name_1="value 1",name_2="value 2"} 2010.1971
summary_name_count{name_1="value 1",name_2="value 2"} 4711
`,
},
// 4: Histogram
// 7: Histogram
{
in: &dto.MetricFamily{
Name: proto.String("request_duration_microseconds"),
@@ -277,7 +394,7 @@ request_duration_microseconds_sum 1.7560473e+06
request_duration_microseconds_count 2693
`,
},
// 5: Histogram with missing +Inf bucket.
// 8: Histogram with missing +Inf bucket.
{
in: &dto.MetricFamily{
Name: proto.String("request_duration_microseconds"),
@@ -321,7 +438,7 @@ request_duration_microseconds_sum 1.7560473e+06
request_duration_microseconds_count 2693
`,
},
// 6: Histogram with missing +Inf bucket but with different exemplars.
// 9: Histogram with missing +Inf bucket but with different exemplars.
{
in: &dto.MetricFamily{
Name: proto.String("request_duration_microseconds"),
@@ -388,7 +505,7 @@ request_duration_microseconds_sum 1.7560473e+06
request_duration_microseconds_count 2693
`,
},
// 7: Simple Counter.
// 10: Simple Counter.
{
in: &dto.MetricFamily{
Name: proto.String("foos_total"),
@@ -407,7 +524,7 @@ request_duration_microseconds_count 2693
foos_total 42.0
`,
},
// 8: No metric.
// 11: No metric.
{
in: &dto.MetricFamily{
Name: proto.String("name_total"),
118 changes: 87 additions & 31 deletions expfmt/text_create.go
Original file line number Diff line number Diff line change
@@ -62,6 +62,18 @@ var (
// contains duplicate metrics or invalid metric or label names, the conversion
// will result in invalid text format output.
//
// If metric names conform to the legacy validation pattern, they will be placed
// outside the brackets in the traditional way, like `foo{}`. If the metric name
// fails the legacy validation check, it will be placed quoted inside the
// brackets: `{"foo"}`. As stated above, the input is assumed to be santized and
// no error will be thrown in this case.
//
// Similar to metric names, if label names conform to the legacy validation
// pattern, they will be unquoted as normal, like `foo{bar="baz"}`. If the label
// name fails the legacy validation check, it will be quoted:
// `foo{"bar"="baz"}`. As stated above, the input is assumed to be santized and
// no error will be thrown in this case.
//
// This method fulfills the type 'prometheus.encoder'.
func MetricFamilyToText(out io.Writer, in *dto.MetricFamily) (written int, err error) {
// Fail-fast checks.
@@ -98,7 +110,7 @@ func MetricFamilyToText(out io.Writer, in *dto.MetricFamily) (written int, err e
if err != nil {
return
}
n, err = w.WriteString(name)
n, err = writeName(w, name)
written += n
if err != nil {
return
@@ -124,7 +136,7 @@ func MetricFamilyToText(out io.Writer, in *dto.MetricFamily) (written int, err e
if err != nil {
return
}
n, err = w.WriteString(name)
n, err = writeName(w, name)
written += n
if err != nil {
return
@@ -280,21 +292,9 @@ func writeSample(
additionalLabelName string, additionalLabelValue float64,
value float64,
) (int, error) {
var written int
n, err := w.WriteString(name)
written += n
if err != nil {
return written, err
}
if suffix != "" {
n, err = w.WriteString(suffix)
written += n
if err != nil {
return written, err
}
}
n, err = writeLabelPairs(
w, metric.Label, additionalLabelName, additionalLabelValue,
written := 0
n, err := writeNameAndLabelPairs(
w, name+suffix, metric.Label, additionalLabelName, additionalLabelValue,
)
written += n
if err != nil {
@@ -330,32 +330,64 @@ func writeSample(
return written, nil
}

// writeLabelPairs converts a slice of LabelPair proto messages plus the
// explicitly given additional label pair into text formatted as required by the
// text format and writes it to 'w'. An empty slice in combination with an empty
// string 'additionalLabelName' results in nothing being written. Otherwise, the
// label pairs are written, escaped as required by the text format, and enclosed
// in '{...}'. The function returns the number of bytes written and any error
// encountered.
func writeLabelPairs(
// writeNameAndLabelPairs converts a slice of LabelPair proto messages plus the
// explicitly given metric name and additional label pair into text formatted as
// required by the text format and writes it to 'w'. An empty slice in
// combination with an empty string 'additionalLabelName' results in nothing
// being written. Otherwise, the label pairs are written, escaped as required by
// the text format, and enclosed in '{...}'. The function returns the number of
// bytes written and any error encountered. If the metric name is not
// legacy-valid, it will be put inside the brackets as well. Legacy-invalid
// label names will also be quoted.
func writeNameAndLabelPairs(
w enhancedWriter,
name string,
in []*dto.LabelPair,
additionalLabelName string, additionalLabelValue float64,
) (int, error) {
if len(in) == 0 && additionalLabelName == "" {
return 0, nil
}
var (
written int
separator byte = '{'
written int
separator byte = '{'
metricInsideBraces = false
)

if name != "" {
// If the name does not pass the legacy validity check, we must put the
// metric name inside the braces.
if !model.IsValidLegacyMetricName(model.LabelValue(name)) {
metricInsideBraces = true
err := w.WriteByte(separator)
written++
if err != nil {
return written, err
}
separator = ','
}
n, err := writeName(w, name)
written += n
if err != nil {
return written, err
}
}

if len(in) == 0 && additionalLabelName == "" {
if metricInsideBraces {
err := w.WriteByte('}')
written++
if err != nil {
return written, err
}
}
return written, nil
}

for _, lp := range in {
err := w.WriteByte(separator)
written++
if err != nil {
return written, err
}
n, err := w.WriteString(lp.GetName())
n, err := writeName(w, lp.GetName())
written += n
if err != nil {
return written, err
@@ -462,3 +494,27 @@ func writeInt(w enhancedWriter, i int64) (int, error) {
numBufPool.Put(bp)
return written, err
}

// writeName writes a string as-is if it complies with the legacy naming
// scheme, or escapes it in double quotes if not.
func writeName(w enhancedWriter, name string) (int, error) {
if model.IsValidLegacyMetricName(model.LabelValue(name)) {
return w.WriteString(name)
}
var written int
var err error
err = w.WriteByte('"')
written++
if err != nil {
return written, err
}
var n int
n, err = writeEscapedString(w, name, true)
written += n
if err != nil {
return written, err
}
err = w.WriteByte('"')
written++
return written, err
}
55 changes: 50 additions & 5 deletions expfmt/text_create_test.go
Original file line number Diff line number Diff line change
@@ -120,7 +120,52 @@ gauge_name{name_1="val with\nnew line",name_2="val with \\backslash and \"quotes
gauge_name{name_1="Björn",name_2="佖佥"} 3.14e+42
`,
},
// 2: Untyped, no help, one sample with no labels and -Inf as value, another sample with one label.
// 2: Gauge, utf8, +Inf as value, multi-byte characters in label values.
{
in: &dto.MetricFamily{
Name: proto.String("gauge.name"),
Help: proto.String("gauge\ndoc\nstr\"ing"),
Type: dto.MetricType_GAUGE.Enum(),
Metric: []*dto.Metric{
{
Label: []*dto.LabelPair{
{
Name: proto.String("name.1"),
Value: proto.String("val with\nnew line"),
},
{
Name: proto.String("name*2"),
Value: proto.String("val with \\backslash and \"quotes\""),
},
},
Gauge: &dto.Gauge{
Value: proto.Float64(math.Inf(+1)),
},
},
{
Label: []*dto.LabelPair{
{
Name: proto.String("name.1"),
Value: proto.String("Björn"),
},
{
Name: proto.String("name*2"),
Value: proto.String("佖佥"),
},
},
Gauge: &dto.Gauge{
Value: proto.Float64(3.14e42),
},
},
},
},
out: `# HELP "gauge.name" gauge\ndoc\nstr"ing
# TYPE "gauge.name" gauge
{"gauge.name","name.1"="val with\nnew line","name*2"="val with \\backslash and \"quotes\""} +Inf
{"gauge.name","name.1"="Björn","name*2"="佖佥"} 3.14e+42
`,
},
// 3: Untyped, no help, one sample with no labels and -Inf as value, another sample with one label.
{
in: &dto.MetricFamily{
Name: proto.String("untyped_name"),
@@ -149,7 +194,7 @@ untyped_name -Inf
untyped_name{name_1="value 1"} -1.23e-45
`,
},
// 3: Summary.
// 4: Summary.
{
in: &dto.MetricFamily{
Name: proto.String("summary_name"),
@@ -222,7 +267,7 @@ summary_name_sum{name_1="value 1",name_2="value 2"} 2010.1971
summary_name_count{name_1="value 1",name_2="value 2"} 4711
`,
},
// 4: Histogram
// 5: Histogram
{
in: &dto.MetricFamily{
Name: proto.String("request_duration_microseconds"),
@@ -270,7 +315,7 @@ request_duration_microseconds_sum 1.7560473e+06
request_duration_microseconds_count 2693
`,
},
// 5: Histogram with missing +Inf bucket.
// 6: Histogram with missing +Inf bucket.
{
in: &dto.MetricFamily{
Name: proto.String("request_duration_microseconds"),
@@ -314,7 +359,7 @@ request_duration_microseconds_sum 1.7560473e+06
request_duration_microseconds_count 2693
`,
},
// 6: No metric type, should result in default type Counter.
// 7: No metric type, should result in default type Counter.
{
in: &dto.MetricFamily{
Name: proto.String("name"),
20 changes: 14 additions & 6 deletions model/labels.go
Original file line number Diff line number Diff line change
@@ -97,17 +97,25 @@ var LabelNameRE = regexp.MustCompile("^[a-zA-Z_][a-zA-Z0-9_]*$")
// therewith.
type LabelName string

// IsValid is true iff the label name matches the pattern of LabelNameRE. This
// method, however, does not use LabelNameRE for the check but a much faster
// hardcoded implementation.
// IsValid returns true iff name matches the pattern of LabelNameRE for legacy
// names, and iff it's valid UTF-8 if NameValidationScheme is set to
// UTF8Validation. For the legacy matching, it does not use LabelNameRE for the
// check but a much faster hardcoded implementation.
func (ln LabelName) IsValid() bool {
if len(ln) == 0 {
return false
}
for i, b := range ln {
if !((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || (b >= '0' && b <= '9' && i > 0)) {
return false
switch NameValidationScheme {
case LegacyValidation:
for i, b := range ln {
if !((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || (b >= '0' && b <= '9' && i > 0)) {
return false
}
}
case UTF8Validation:
return utf8.ValidString(string(ln))
default:
panic(fmt.Sprintf("Invalid name validation scheme requested: %d", NameValidationScheme))
}
return true
}
63 changes: 41 additions & 22 deletions model/labels_test.go
Original file line number Diff line number Diff line change
@@ -92,49 +92,68 @@ func BenchmarkLabelValues(b *testing.B) {

func TestLabelNameIsValid(t *testing.T) {
scenarios := []struct {
ln LabelName
valid bool
ln LabelName
legacyValid bool
utf8Valid bool
}{
{
ln: "Avalid_23name",
valid: true,
ln: "Avalid_23name",
legacyValid: true,
utf8Valid: true,
},
{
ln: "_Avalid_23name",
valid: true,
ln: "_Avalid_23name",
legacyValid: true,
utf8Valid: true,
},
{
ln: "1valid_23name",
valid: false,
ln: "1valid_23name",
legacyValid: false,
utf8Valid: true,
},
{
ln: "avalid_23name",
valid: true,
ln: "avalid_23name",
legacyValid: true,
utf8Valid: true,
},
{
ln: "Ava:lid_23name",
valid: false,
ln: "Ava:lid_23name",
legacyValid: false,
utf8Valid: true,
},
{
ln: "a lid_23name",
valid: false,
ln: "a lid_23name",
legacyValid: false,
utf8Valid: true,
},
{
ln: ":leading_colon",
valid: false,
ln: ":leading_colon",
legacyValid: false,
utf8Valid: true,
},
{
ln: "colon:in:the:middle",
valid: false,
ln: "colon:in:the:middle",
legacyValid: false,
utf8Valid: true,
},
{
ln: "a\xc5z",
legacyValid: false,
utf8Valid: false,
},
}

for _, s := range scenarios {
if s.ln.IsValid() != s.valid {
t.Errorf("Expected %v for %q using IsValid method", s.valid, s.ln)
NameValidationScheme = LegacyValidation
if s.ln.IsValid() != s.legacyValid {
t.Errorf("Expected %v for %q using legacy IsValid method", s.legacyValid, s.ln)
}
if LabelNameRE.MatchString(string(s.ln)) != s.legacyValid {
t.Errorf("Expected %v for %q using legacy regexp match", s.legacyValid, s.ln)
}
if LabelNameRE.MatchString(string(s.ln)) != s.valid {
t.Errorf("Expected %v for %q using regexp match", s.valid, s.ln)
NameValidationScheme = UTF8Validation
if s.ln.IsValid() != s.utf8Valid {
t.Errorf("Expected %v for %q using UTF8 IsValid method", s.legacyValid, s.ln)
}
}
}
1 change: 1 addition & 0 deletions model/labelset_test.go
Original file line number Diff line number Diff line change
@@ -52,6 +52,7 @@ func TestUnmarshalJSONLabelSet(t *testing.T) {
}
}`

NameValidationScheme = LegacyValidation
err = json.Unmarshal([]byte(invalidlabelSetJSON), &c)
expectedErr := `"1nvalid_23name" is not a valid label name`
if err == nil || err.Error() != expectedErr {
57 changes: 51 additions & 6 deletions model/metric.go
Original file line number Diff line number Diff line change
@@ -18,12 +18,39 @@ import (
"regexp"
"sort"
"strings"
"unicode/utf8"
)

// MetricNameRE is a regular expression matching valid metric
// names. Note that the IsValidMetricName function performs the same
// check but faster than a match with this regular expression.
var MetricNameRE = regexp.MustCompile(`^[a-zA-Z_:][a-zA-Z0-9_:]*$`)
// ValidationScheme is a Go enum for determining how metric and label names will
// be validated by this library.
type ValidationScheme int

const (
// LegacyValidation is a setting that requirets that metric and label names
// conform to the original Prometheus character requirements described by
// MetricNameRE and LabelNameRE.
LegacyValidation ValidationScheme = iota

// UTF8Validation only requires that metric and label names be valid UTF8
// strings.
UTF8Validation
)

var (
// NameValidationScheme determines the method of name validation to be used by
// all calls to IsValidMetricName() and LabelName IsValid(). Setting UTF8 mode
// in isolation from other components that don't support UTF8 may result in
// bugs or other undefined behavior. This value is intended to be set by
// UTF8-aware binaries as part of their startup. To avoid need for locking,
// this value should be set once, ideally in an init(), before multiple
// goroutines are started.
NameValidationScheme = LegacyValidation

// MetricNameRE is a regular expression matching valid metric
// names. Note that the IsValidMetricName function performs the same
// check but faster than a match with this regular expression.
MetricNameRE = regexp.MustCompile(`^[a-zA-Z_:][a-zA-Z0-9_:]*$`)
)

// A Metric is similar to a LabelSet, but the key difference is that a Metric is
// a singleton and refers to one and only one stream of samples.
@@ -84,10 +111,28 @@ func (m Metric) FastFingerprint() Fingerprint {
return LabelSet(m).FastFingerprint()
}

// IsValidMetricName returns true iff name matches the pattern of MetricNameRE.
// IsValidMetricName returns true iff name matches the pattern of MetricNameRE
// for legacy names, and iff it's valid UTF-8 if the UTF8Validation scheme is
// selected.
func IsValidMetricName(n LabelValue) bool {
switch NameValidationScheme {
case LegacyValidation:
return IsValidLegacyMetricName(n)
case UTF8Validation:
if len(n) == 0 {
return false
}
return utf8.ValidString(string(n))
default:
panic(fmt.Sprintf("Invalid name validation scheme requested: %d", NameValidationScheme))
}
}

// IsValidLegacyMetricName is similar to IsValidMetricName but always uses the
// legacy validation scheme regardless of the value of NameValidationScheme.
// This function, however, does not use MetricNameRE for the check but a much
// faster hardcoded implementation.
func IsValidMetricName(n LabelValue) bool {
func IsValidLegacyMetricName(n LabelValue) bool {
if len(n) == 0 {
return false
}
70 changes: 45 additions & 25 deletions model/metric_test.go
Original file line number Diff line number Diff line change
@@ -82,55 +82,75 @@ func BenchmarkMetric(b *testing.B) {
}
}

func TestMetricNameIsValid(t *testing.T) {
func TestMetricNameIsLegacyValid(t *testing.T) {
scenarios := []struct {
mn LabelValue
valid bool
mn LabelValue
legacyValid bool
utf8Valid bool
}{
{
mn: "Avalid_23name",
valid: true,
mn: "Avalid_23name",
legacyValid: true,
utf8Valid: true,
},
{
mn: "_Avalid_23name",
valid: true,
mn: "_Avalid_23name",
legacyValid: true,
utf8Valid: true,
},
{
mn: "1valid_23name",
valid: false,
mn: "1valid_23name",
legacyValid: false,
utf8Valid: true,
},
{
mn: "avalid_23name",
valid: true,
mn: "avalid_23name",
legacyValid: true,
utf8Valid: true,
},
{
mn: "Ava:lid_23name",
valid: true,
mn: "Ava:lid_23name",
legacyValid: true,
utf8Valid: true,
},
{
mn: "a lid_23name",
valid: false,
mn: "a lid_23name",
legacyValid: false,
utf8Valid: true,
},
{
mn: ":leading_colon",
valid: true,
mn: ":leading_colon",
legacyValid: true,
utf8Valid: true,
},
{
mn: "colon:in:the:middle",
valid: true,
mn: "colon:in:the:middle",
legacyValid: true,
utf8Valid: true,
},
{
mn: "",
valid: false,
mn: "",
legacyValid: false,
utf8Valid: false,
},
{
mn: "a\xc5z",
legacyValid: false,
utf8Valid: false,
},
}

for _, s := range scenarios {
if IsValidMetricName(s.mn) != s.valid {
t.Errorf("Expected %v for %q using IsValidMetricName function", s.valid, s.mn)
NameValidationScheme = LegacyValidation
if IsValidMetricName(s.mn) != s.legacyValid {
t.Errorf("Expected %v for %q using legacy IsValidMetricName method", s.legacyValid, s.mn)
}
if MetricNameRE.MatchString(string(s.mn)) != s.legacyValid {
t.Errorf("Expected %v for %q using regexp matching", s.legacyValid, s.mn)
}
if MetricNameRE.MatchString(string(s.mn)) != s.valid {
t.Errorf("Expected %v for %q using regexp matching", s.valid, s.mn)
NameValidationScheme = UTF8Validation
if IsValidMetricName(s.mn) != s.utf8Valid {
t.Errorf("Expected %v for %q using utf8 IsValidMetricName method", s.legacyValid, s.mn)
}
}
}
60 changes: 45 additions & 15 deletions model/silence_test.go
Original file line number Diff line number Diff line change
@@ -21,8 +21,9 @@ import (

func TestMatcherValidate(t *testing.T) {
cases := []struct {
matcher *Matcher
err string
matcher *Matcher
legacyErr string
utf8Err string
}{
{
matcher: &Matcher{
@@ -42,46 +43,74 @@ func TestMatcherValidate(t *testing.T) {
Name: "name!",
Value: "value",
},
err: "invalid name",
legacyErr: "invalid name",
},
{
matcher: &Matcher{
Name: "",
Value: "value",
},
err: "invalid name",
legacyErr: "invalid name",
utf8Err: "invalid name",
},
{
matcher: &Matcher{
Name: "name",
Value: "value\xff",
},
err: "invalid value",
legacyErr: "invalid value",
utf8Err: "invalid value",
},
{
matcher: &Matcher{
Name: "name",
Value: "",
},
err: "invalid value",
legacyErr: "invalid value",
utf8Err: "invalid value",
},
{
matcher: &Matcher{
Name: "a\xc5z",
Value: "",
},
legacyErr: "invalid name",
utf8Err: "invalid name",
},
}

for i, c := range cases {
err := c.matcher.Validate()
if err == nil {
if c.err == "" {
NameValidationScheme = LegacyValidation
legacyErr := c.matcher.Validate()
NameValidationScheme = UTF8Validation
utf8Err := c.matcher.Validate()
if legacyErr == nil && utf8Err == nil {
if c.legacyErr == "" && c.utf8Err == "" {
continue
}
t.Errorf("%d. Expected error %q but got none", i, c.err)
if c.legacyErr != "" {
t.Errorf("%d. Expected error for legacy validation %q but got none", i, c.legacyErr)
}
if c.utf8Err != "" {
t.Errorf("%d. Expected error for utf8 validation %q but got none", i, c.utf8Err)
}
continue
}
if c.err == "" {
t.Errorf("%d. Expected no error but got %q", i, err)
continue
if legacyErr != nil {
if c.legacyErr == "" {
t.Errorf("%d. Expected no legacy validation error but got %q", i, legacyErr)
} else if !strings.Contains(legacyErr.Error(), c.legacyErr) {
t.Errorf("%d. Expected error to contain %q but got %q", i, c.legacyErr, legacyErr)
}
}
if !strings.Contains(err.Error(), c.err) {
t.Errorf("%d. Expected error to contain %q but got %q", i, c.err, err)
if utf8Err != nil {
if c.utf8Err == "" {
t.Errorf("%d. Expected no utf8 validation error but got %q", i, utf8Err)
continue
}
if !strings.Contains(utf8Err.Error(), c.utf8Err) {
t.Errorf("%d. Expected error to contain %q but got %q", i, c.utf8Err, utf8Err)
}
}
}
}
@@ -219,6 +248,7 @@ func TestSilenceValidate(t *testing.T) {
}

for i, c := range cases {
NameValidationScheme = LegacyValidation
err := c.sil.Validate()
if err == nil {
if c.err == "" {