This repository has been archived by the owner on Jul 29, 2024. It is now read-only.
forked from mozilla-services/mozilla-pipeline-schemas
-
Notifications
You must be signed in to change notification settings - Fork 1
/
metaschema.1.schema.json
101 lines (101 loc) · 4.5 KB
/
metaschema.1.schema.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
{
"$schema": "http://json-schema.org/draft-04/schema#",
"id": "http://jsonschema.net",
"type": "object",
"description": "Metaschema for validating the structure of the per-doctype schemas in this repository",
"properties": {
"mozPipelineMetadata": {
"type": "object",
"description": "Container for per-doctype metadata that can affect how pings are processed in the pipeline",
"additionalProperties": false,
"properties": {
"$comment": {
"type": "string",
"description": "Optional comment about the pipeline handling for this doctype"
},
"bq_table": {
"type": "string",
"description": "NOT YET IMPLEMENTED: The name of the destination BigQuery table"
},
"bq_dataset_family": {
"type": "string",
"description": "NOT YET IMPLEMENTED: The base name for the destination BigQuery dataset; if this is set to 'telemetry', the pipeline will write into 'telemetry_live'"
},
"bq_metadata_format": {
"type": "string",
"description": "The logical format for the metadata struct in the destination BigQuery table",
"enum": ["structured", "telemetry", "pioneer"]
},
"submission_timestamp_granularity": {
"type": "string",
"description": "If specified, the submission_timestamp field will be truncated to the specified granularity in the pipeline before being output to BigQuery; this can be used to reduce the potential for using time-based attacks to correlate datasets using different client-level identifiers; see Java's ChronoUnit for additional granularities that could be considered for inclusion; implemented for bug 1742172",
"enum": ["millis", "seconds", "minutes", "hours", "days"]
},
"expiration_policy": {
"type": "object",
"description": "Various options controlling data lifecycle",
"additionalProperties": false,
"properties": {
"delete_after_days": {
"type": "integer",
"description": "If present, a time_partitioning_expiration policy will be set on the destination stable table in BigQuery"
},
"collect_through_date": {
"type": "string",
"pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}$",
"description": "If present, the pipeline will reject new data with submission_timestamp after the given date, sending it to error output"
}
}
},
"override_attributes": {
"type": "array",
"description": "Mappings of Pub/Sub attribute names to static values; these are applied in the Decoder immediately before incorporating metadata into the payload, so can be used to overwrite values calculated in the pipeline; a null value will cause the pipeline to drop the named attribute; some attribute names differ from the nested metadata format in BigQuery, so for example you must use \"geo_city\" here in order to manipulate the value that shows up as metadata.geo.city; implemented for bug 1742172",
"items": {
"type": "object",
"additionalProperties": false,
"properties": {
"name":{
"type": "string",
"enum": [
"geo_city",
"geo_subdivision1",
"geo_subdivision2",
"normalized_channel"
]
},
"value":{
"type": ["string", "null"]
}
},
"required": [
"name",
"value"
]
}
},
"jwe_mappings": {
"type": "array",
"description": "Mappings of encrypted JWE field paths to destinations where the value decrypted by the pipeline should be placed; initial use case is Account Ecosystem Telemetry; paths must be in [JSON Pointer format](https://tools.ietf.org/html/rfc6901) like '/payload/ecosystemAnonId'",
"items": {
"type": "object",
"additionalProperties": false,
"properties": {
"source_field_path":{
"type": "string",
"pattern": "^/.*$"
},
"decrypted_field_path":{
"type": "string",
"pattern": "^/.*$"
}
},
"required": [
"source_field_path",
"decrypted_field_path"
]
}
}
}
}
}
}