diff --git a/zschema/__main__.py b/zschema/__main__.py
index 321a35f..558622b 100644
--- a/zschema/__main__.py
+++ b/zschema/__main__.py
@@ -11,8 +11,9 @@
def usage():
sys.stderr.write("USAGE: %s command schema [file].\n" % sys.argv[0].split("/")[-1])
- sys.stderr.write("Valid commands: bigquery, elasticsearch, json, text, html, censys-html, flat, validate.\n")
- sys.stderr.write("schema should be defined as file.py:record\n")
+ sys.stderr.write("Valid commands: bigquery, elasticsearch, docs-es, docs-bq, json, flat, validate.\n")
+ sys.stderr.write("Schema should be passed as file.py:record\n")
+ sys.stderr.write("The optional 'file' argument is used only as the test file for the 'validate' command.\n")
sys.stderr.write("VERSION: %s\n" % zschema.__version__)
sys.exit(1)
@@ -27,26 +28,15 @@ def main():
print json.dumps(record.to_bigquery())
elif command == "elasticsearch":
print json.dumps(record.to_es(recname))
+ elif command == "docs-es":
+ print json.dumps(record.docs_es(recname))
+ elif command == "docs-bq":
+ print json.dumps(record.docs_bq(recname))
elif command == "json":
print record.to_json()
- elif command == "html":
- for r in record.to_flat():
- type_ = r.get("es_type", "")
- print "
%s | %s |
" % (r["name"], type_)
- elif command == "text":
- print record.to_text()
elif command == "flat":
for r in record.to_flat():
print json.dumps(r)
- elif command == "censys-html":
- for r in record.to_flat():
- type_ = r.get("es_type", None)
- len_ = r["name"].count(".")
- style = 'style="padding-left: %ipx"' % (15 * len_ + 5)
- if not type_:
- print '%s | %s |
' % (style, r["name"], "")
- else:
- print "%s | %s |
" % (style, r["name"], type_)
elif command == "validate":
if not os.path.exists(sys.argv[3]):
sys.stderr.write("Invalid test file. %s does not exist.\n" % sys.argv[3])
diff --git a/zschema/compounds.py b/zschema/compounds.py
index d107b60..cf3a245 100644
--- a/zschema/compounds.py
+++ b/zschema/compounds.py
@@ -10,9 +10,11 @@ def _is_valid_object(name, object_):
class ListOf(Keyable):
- def __init__(self, object_, max_items=10):
+ def __init__(self, object_, max_items=10, doc=None, category=None):
self.object_ = object_
self.max_items = max_items
+ self.category = category
+ self.doc = doc
_is_valid_object("Anonymous ListOf", object_)
@property
@@ -33,9 +35,27 @@ def to_bigquery(self, name):
retv["mode"] = "REPEATED"
return retv
+ def docs_bq(self, parent_category=None):
+ retv = self.object_.docs_bq()
+ category = self.category or parent_category
+ retv["category"] = category
+ retv["repeated"] = True
+ if self.doc:
+ retv["doc"] = self.doc
+ return retv
+
def to_es(self):
return self.object_.to_es()
+ def docs_es(self, parent_category=None):
+ retv = self.object_.docs_es()
+ category = self.category or parent_category
+ retv["category"] = category
+ retv["repeated"] = True
+ if self.doc:
+ retv["doc"] = self.doc
+ return retv
+
def validate(self, name, value):
if type(value) != list:
raise DataValidationException("%s: %s is not a list",
@@ -59,11 +79,13 @@ def __init__(self,
doc=None,
extends=None,
allow_unknown=False,
- exclude=None):
+ exclude=None,
+ category=None):
self.definition = definition
self.required = required
self.allow_unknown = allow_unknown
self.doc = doc
+ self.category = category
self._exclude = set(exclude) if exclude else set([])
# merge
if extends:
@@ -113,14 +135,25 @@ def merge(self, other):
return self
def to_bigquery(self, name):
- fields = [v.to_bigquery(k) for (k,v) in sorted(self.definition.iteritems()) if \
- not v.exclude_bigquery]
- return {
+ fields = [v.to_bigquery(k) \
+ for (k,v) in sorted(self.definition.iteritems()) \
+ if not v.exclude_bigquery
+ ]
+ retv = {
"name":self.key_to_bq(name),
"type":"RECORD",
"fields":fields,
"mode":"REQUIRED" if self.required else "NULLABLE"
}
+ return retv
+
+ def docs_bq(self, parent_category=None):
+ retv = self._docs_common(parent_category=parent_category)
+ fields = { self.key_to_bq(k): v.docs_bq() \
+ for (k,v) in sorted(self.definition.iteritems()) \
+ if not v.exclude_bigquery }
+ retv["fields"] = fields
+ return retv
def print_indent_string(self, name, indent):
tabs = "\t" * indent if indent else ""
@@ -129,10 +162,28 @@ def print_indent_string(self, name, indent):
value.print_indent_string(name, indent+1)
def to_es(self):
- p = {self.key_to_es(k): v.to_es() for k, v in sorted(self.definition.iteritems()) \
+ p = {self.key_to_es(k): v.to_es() \
+ for k, v in sorted(self.definition.iteritems()) \
if not v.exclude_elasticsearch}
return {"properties": p}
+ def _docs_common(self, parent_category):
+ category = self.category or parent_category
+ retv = {
+ "category": category,
+ "doc": self.doc,
+ "type": self.__class__.__name__,
+ "required": self.required,
+ }
+ return retv
+
+ def docs_es(self, parent_category=None):
+ retv = self._docs_common(parent_category=parent_category)
+ retv["fields"] = { self.key_to_es(k): v.docs_es() \
+ for k, v in sorted(self.definition.iteritems()) \
+ if not v.exclude_elasticsearch }
+ return retv
+
def to_dict(self):
source = sorted(self.definition.iteritems())
p = {self.key_to_es(k): v.to_dict() for k, v in source}
@@ -152,8 +203,8 @@ def validate(self, name, value):
class NestedListOf(ListOf):
- def __init__(self, object_, subrecord_name, max_items=10):
- ListOf.__init__(self, object_, max_items)
+ def __init__(self, object_, subrecord_name, max_items=10, doc=None, category=None):
+ ListOf.__init__(self, object_, max_items, doc=doc, category=category)
self.subrecord_name = subrecord_name
def to_bigquery(self, name):
@@ -162,6 +213,19 @@ def to_bigquery(self, name):
})
retv = subr.to_bigquery(self.key_to_bq(name))
retv["mode"] = "REPEATED"
+ if self.doc:
+ retv["doc"] = self.doc
+ return retv
+
+ def docs_bq(self, parent_category=None):
+ subr = SubRecord({
+ self.subrecord_name: ListOf(self.object_)
+ })
+ category = self.category or parent_category
+ retv = subr.docs_bq(parent_category=category)
+ retv["repeated"] = True
+ if self.doc:
+ retv["doc"] = self.doc
return retv
@@ -170,24 +234,25 @@ class Record(SubRecord):
def to_es(self, name):
return {name:SubRecord.to_es(self)}
+ def docs_es(self, name, parent_category=None):
+ category = self.category or parent_category
+ return {name: SubRecord.docs_es(self, parent_category=category)}
+
def to_bigquery(self):
source = sorted(self.definition.iteritems())
- return [s.to_bigquery(name) for (name, s) in source \
- if not s.exclude_bigquery]
-
- def to_html(self):
- pass
+ return [s.to_bigquery(name) \
+ for (name, s) in source \
+ if not s.exclude_bigquery
+ ]
- def to_documented_html(self):
- pass
+ def docs_bq(self, name, parent_category=None):
+ category = self.category or parent_category
+ return {name: SubRecord.docs_bq(self, parent_category=category)}
def print_indent_string(self):
for name, field in sorted(self.definition.iteritems()):
field.print_indent_string(name, 0)
- def to_dotted_text(self):
- pass
-
def validate(self, value):
if type(value) != dict:
raise DataValidationException("record is not a dict", str(value))
@@ -212,5 +277,3 @@ def to_flat(self):
@classmethod
def from_json(cls, j):
return cls({(k, __encode(v)) for k, v in sorted(j.iteritems())})
-
-
diff --git a/zschema/leaves.py b/zschema/leaves.py
index f5829ce..b079630 100644
--- a/zschema/leaves.py
+++ b/zschema/leaves.py
@@ -15,12 +15,11 @@ def __init__(self,
es_index=None,
es_analyzer=None,
doc=None,
+ examples=None,
es_include_raw=None,
deprecated=False,
ignore=False,
- autocomplete_include=True,
- autocomplete_category=None,
- autocomplete_icon=None,
+ category=None,
exclude=None,
metadata=None,
units=None,
@@ -30,6 +29,7 @@ def __init__(self,
self.es_index = es_index
self.es_analyzer = es_analyzer
self.doc = doc
+ self.examples = examples if examples else []
if es_include_raw is not None:
self.es_include_raw = es_include_raw
else:
@@ -40,9 +40,7 @@ def __init__(self,
e = "WARN: %s is deprecated and will be removed in a "\
"future release\n" % self.__class__.__name__
sys.stderr.write(e)
- self.autocomplete_category = autocomplete_category
- self.autocomplete_category = autocomplete_category
- self.autocomplete_icon = autocomplete_icon
+ self.category = category
self._exclude = set(exclude) if exclude else set([])
self.metadata = metadata if metadata else {}
self.units = units
@@ -56,7 +54,8 @@ def to_dict(self):
"type":self.__class__.__name__,
"es_type":self.ES_TYPE,
"bq_type":self.BQ_TYPE,
- "metadata":self.metadata
+ "metadata":self.metadata,
+ "examples": self.examples,
}
if self.units is not None:
retv["units"] = self.units
@@ -72,13 +71,36 @@ def to_es(self):
self.add_es_var(retv, "analyzer", "es_analyzer", "ES_ANALYZER")
self.add_es_var(retv, "search_analyzer", "es_search_analyzer",
"ES_SEARCH_ANALYZER")
-
if self.es_include_raw:
retv["fields"] = {
"raw":{"type":"keyword"}
}
return retv
+ def _docs_common(self, parent_category):
+ retv = {
+ "detail_type": self.__class__.__name__,
+ "category": self.category or parent_category,
+ "doc": self.doc,
+ "required": self.required,
+ }
+ if hasattr(self, "values_s") and len(self.values_s):
+ retv["values"] = list(self.values_s)
+ else:
+ retv["examples"] = self.examples
+ return retv
+
+ def docs_es(self, parent_category=None):
+ retv = self._docs_common(parent_category)
+ self.add_es_var(retv, "analyzer", "es_analyzer", "ES_ANALYZER")
+ retv["type"] = self.ES_TYPE
+ return retv
+
+ def docs_bq(self, parent_category=None):
+ retv = self._docs_common(parent_category)
+ retv["type"] = self.BQ_TYPE
+ return retv
+
def to_bigquery(self, name):
if not self._check_valid_name(name):
raise Exception("Invalid field name: %s" % name)
@@ -118,9 +140,6 @@ def to_flat(self, parent, name, repeated=False):
"mode":mode
}
- def to_autocomplete(self, parent, name, repated=False):
- pass
-
def print_indent_string(self, name, indent):
val = self.key_to_string(name)
if indent:
diff --git a/zschema/tests.py b/zschema/tests.py
index 85855d6..b222f7b 100644
--- a/zschema/tests.py
+++ b/zschema/tests.py
@@ -67,6 +67,178 @@ def test_invalid(self):
}
}
+VALID_DOCS_OUTPUT_FOR_ES_FIELDS = {
+ "host": {
+ "category": None,
+ "doc": None,
+ "fields": {
+ "443": {
+ "category": "heartbleed",
+ "doc": None,
+ "fields": {
+ "heartbleed": {
+ "category": None,
+ "doc": None,
+ "fields": {
+ "heartbeat_support": {
+ "category": None,
+ "detail_type": "Boolean",
+ "doc": None,
+ "examples": [],
+ "required": False,
+ "type": "boolean"
+ },
+ "heartbleed_vulnerable": {
+ "category": "Vulnerabilities",
+ "detail_type": "Boolean",
+ "doc": None,
+ "examples": [],
+ "required": False,
+ "type": "boolean"
+ },
+ "timestamp": {
+ "category": None,
+ "detail_type": "DateTime",
+ "doc": None,
+ "examples": [],
+ "required": False,
+ "type": "date"
+ }
+ },
+ "required": False,
+ "type": "SubRecord"
+ },
+ "tls": {
+ "category": None,
+ "detail_type": "String",
+ "doc": None,
+ "examples": [],
+ "required": False,
+ "type": "keyword"
+ }
+ },
+ "required": False,
+ "type": "SubRecord"
+ },
+ "ip": {
+ "category": None,
+ "detail_type": "Long",
+ "doc": "The IP Address of the host",
+ "examples": [],
+ "required": False,
+ "type": "long"
+ },
+ "ipstr": {
+ "category": None,
+ "detail_type": "IPv4Address",
+ "doc": None,
+ "examples": [
+ "8.8.8.8"
+ ],
+ "required": True,
+ "type": "ip"
+ },
+ "tags": {
+ "category": None,
+ "detail_type": "String",
+ "doc": None,
+ "examples": [],
+ "repeated": True,
+ "required": False,
+ "type": "keyword"
+ }
+ },
+ "required": False,
+ "type": "Record"
+ }
+}
+
+VALID_DOCS_OUTPUT_FOR_BIG_QUERY_FIELDS = {
+ "host": {
+ "category": None,
+ "doc": None,
+ "fields": {
+ "ip": {
+ "category": None,
+ "detail_type": "Long",
+ "doc": "The IP Address of the host",
+ "examples": [],
+ "required": False,
+ "type": "INTEGER"
+ },
+ "ipstr": {
+ "category": None,
+ "detail_type": "IPv4Address",
+ "doc": None,
+ "examples": [
+ "8.8.8.8"
+ ],
+ "required": True,
+ "type": "STRING"
+ },
+ "p443": {
+ "category": "heartbleed",
+ "doc": None,
+ "fields": {
+ "heartbleed": {
+ "category": None,
+ "doc": None,
+ "fields": {
+ "heartbeat_support": {
+ "category": None,
+ "detail_type": "Boolean",
+ "doc": None,
+ "examples": [],
+ "required": False,
+ "type": "BOOLEAN"
+ },
+ "heartbleed_vulnerable": {
+ "category": "Vulnerabilities",
+ "detail_type": "Boolean",
+ "doc": None,
+ "examples": [],
+ "required": False,
+ "type": "BOOLEAN"
+ },
+ "timestamp": {
+ "category": None,
+ "detail_type": "DateTime",
+ "doc": None,
+ "examples": [],
+ "required": False,
+ "type": "DATETIME"
+ }
+ },
+ "required": False,
+ "type": "SubRecord"
+ },
+ "tls": {
+ "category": None,
+ "detail_type": "String",
+ "doc": None,
+ "examples": [],
+ "required": False,
+ "type": "STRING"
+ }
+ },
+ "required": False,
+ "type": "SubRecord"
+ },
+ "tags": {
+ "category": None,
+ "detail_type": "String",
+ "doc": None,
+ "examples": [],
+ "repeated": True,
+ "required": False,
+ "type": "STRING"
+ }
+ },
+ "required": False,
+ "type": "Record"
+ }
+}
+
VALID_BIG_QUERY = [
{
"fields": [
@@ -115,6 +287,7 @@ def test_invalid(self):
{
"type": "INTEGER",
"name": "ip",
+ "doc": "The IP Address of the host",
"mode": "NULLABLE"
},
]
@@ -154,16 +327,16 @@ def setUp(self):
heartbleed = SubRecord({
"heartbeat_support":Boolean(),
- "heartbleed_vulnerable":Boolean(),
+ "heartbleed_vulnerable":Boolean(category="Vulnerabilities"),
"timestamp":DateTime()
})
self.host = Record({
- "ipstr":IPv4Address(required=True),
- "ip":Long(),
+ "ipstr":IPv4Address(required=True, examples=["8.8.8.8"]),
+ "ip":Long(doc="The IP Address of the host"),
Port(443):SubRecord({
"tls":String(),
"heartbleed":heartbleed
- }),
+ }, category="heartbleed"),
"tags":ListOf(String())
})
@@ -177,6 +350,15 @@ def test_elasticsearch(self):
r = self.host.to_es("host")
self.assertEqual(r, VALID_ELASTIC_SEARCH)
+ def test_docs_output(self):
+ global VALID_DOCS_OUTPUT_FOR_ES_FIELDS
+ r = self.host.docs_es("host")
+ self.assertEqual(r, VALID_DOCS_OUTPUT_FOR_ES_FIELDS)
+
+ global VALID_DOCS_OUTPUT_FOR_BIG_QUERY_FIELDS
+ r = self.host.docs_bq("host")
+ self.assertEqual(r, VALID_DOCS_OUTPUT_FOR_BIG_QUERY_FIELDS)
+
def test_validation_known_good(self):
test = {
"ipstr":"141.212.120.1",