|
| 1 | +from analyzer_interface import ( |
| 2 | + AnalyzerInput, |
| 3 | + AnalyzerInterface, |
| 4 | + AnalyzerOutput, |
| 5 | + InputColumn, |
| 6 | + OutputColumn, |
| 7 | +) |
| 8 | + |
| 9 | +interface = AnalyzerInterface( |
| 10 | + # Should be globally unique. |
| 11 | + id="__example__", |
| 12 | + # We don't really use this yet, but specify something for now. |
| 13 | + version="0.1.0", |
| 14 | + # The name of the analyzer as shown on the UI. |
| 15 | + name="Example Analyzer", |
| 16 | + # These descriptions are shown to the user in the UI at some point during the |
| 17 | + # analysis selection process. |
| 18 | + short_description="Example Analyzer (Character Count)", |
| 19 | + long_description=""" |
| 20 | +This is an example analyzer that counts the number of characters in each message. |
| 21 | + """, |
| 22 | + input=AnalyzerInput( |
| 23 | + columns=[ |
| 24 | + InputColumn( |
| 25 | + # This is the column name that you will use in your data analysis |
| 26 | + # code. |
| 27 | + name="message_id", |
| 28 | + # This is the human readable name that will be displayed in the |
| 29 | + # user interface. |
| 30 | + human_readable_name="Unique Message ID", |
| 31 | + # Refer to the complete set of data types by following the |
| 32 | + # type definition. |
| 33 | + data_type="identifier", |
| 34 | + # This is a description of the column that will be displayed in |
| 35 | + # the user interface during column matching. |
| 36 | + description="The unique identifier of the message", |
| 37 | + # This name hints give the application a kind of soft heuristics |
| 38 | + # to match the column to the right data. The user will be able to |
| 39 | + # override the suggestion if it is incorrect. |
| 40 | + # |
| 41 | + # You don't need to provide all possible hints, but the more you |
| 42 | + # provide, the better the suggestions will be. |
| 43 | + name_hints=[ |
| 44 | + "post", |
| 45 | + "message", |
| 46 | + "comment", |
| 47 | + "text", |
| 48 | + "retweet id", |
| 49 | + "tweet", |
| 50 | + ], |
| 51 | + ), |
| 52 | + InputColumn( |
| 53 | + name="message_text", |
| 54 | + human_readable_name="Message Text", |
| 55 | + data_type="text", |
| 56 | + description="The text content of the message", |
| 57 | + name_hints=[ |
| 58 | + "message", |
| 59 | + "text", |
| 60 | + "comment", |
| 61 | + "post", |
| 62 | + "body", |
| 63 | + "content", |
| 64 | + "tweet", |
| 65 | + ], |
| 66 | + ), |
| 67 | + ] |
| 68 | + ), |
| 69 | + outputs=[ |
| 70 | + AnalyzerOutput( |
| 71 | + # This should be locally unique to the analyzer. |
| 72 | + # Remember this -- you will need it to refer to this output in your |
| 73 | + # implementation. It will also form part of the exported output's |
| 74 | + # file name, so choose something that's intuitive. |
| 75 | + id="character_count", |
| 76 | + # This is the human readable name that will be displayed in the |
| 77 | + # user interface. Only used if this is exportable. You can leave |
| 78 | + # it out and it will fallback to the id. |
| 79 | + name="Character Count Per Message", |
| 80 | + # Mark this as internal, so that it is not shown in the list of |
| 81 | + # exported outputs. |
| 82 | + internal=True, |
| 83 | + columns=[ |
| 84 | + OutputColumn( |
| 85 | + # This is the column name that you will use in your data analysis |
| 86 | + # code when saving the output. |
| 87 | + name="message_id", |
| 88 | + # This is the human readable name that will be used in the |
| 89 | + # exported output. |
| 90 | + human_readable_name="Unique Message ID", |
| 91 | + data_type="integer", |
| 92 | + ), |
| 93 | + OutputColumn(name="character_count", data_type="integer"), |
| 94 | + ], |
| 95 | + ) |
| 96 | + ], |
| 97 | +) |
0 commit comments