-
Notifications
You must be signed in to change notification settings - Fork 34
/
Copy pathsbom.spdx3.json
281 lines (281 loc) · 14.3 KB
/
sbom.spdx3.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
{
"@context": "https://spdx.org/rdf/3.0.1/spdx-context.jsonld",
"@graph": [
{
"@id": "_:creationinfo",
"created": "2024-11-06T19:30:00Z",
"createdBy": [
"https://orcid.org/0000-0002-9698-1899"
],
"specVersion": "3.0.1",
"type": "CreationInfo"
},
{
"creationInfo": "_:creationinfo",
"externalIdentifier": [
{
"externalIdentifierType": "other",
"identifier": "bact",
"identifierLocator": [
"https://github.com/bact/"
],
"issuingAuthority": "GitHub",
"type": "ExternalIdentifier"
}
],
"name": "Arthit Suriyawongkul",
"spdxId": "https://orcid.org/0000-0002-9698-1899",
"type": "Person"
},
{
"creationInfo": "_:creationinfo",
"externalIdentifier": [
{
"externalIdentifierType": "other",
"identifier": "PyThaiNLP",
"identifierLocator": [
"https://github.com/PyThaiNLP/"
],
"issuingAuthority": "GitHub",
"type": "ExternalIdentifier"
}
],
"name": "PyThaiNLP",
"spdxId": "https://pythainlp.org/",
"type": "Organization"
},
{
"creationInfo": "_:creationinfo",
"externalIdentifier": [
{
"externalIdentifierType": "other",
"identifier": "wisesight",
"identifierLocator": [
"https://github.com/wisesight/"
],
"issuingAuthority": "GitHub",
"type": "ExternalIdentifier"
}
],
"name": "Wisesight (Thailand) Co., Ltd.",
"spdxId": "https://wisesight.com/",
"type": "Organization"
},
{
"creationInfo": "_:creationinfo",
"profileConformance": [
"core",
"dataset"
],
"rootElement": [
"https://spdx.org/spdxdocs/Bom/01-5b1aeec4-be22-44dc-ae37-4594e38e5119"
],
"spdxId": "https://spdx.org/spdxdocs/SpdxDocument/01-969f4a32-c67c-4824-8afa-d9ac8e634b75",
"type": "SpdxDocument"
},
{
"creationInfo": "_:creationinfo",
"profileConformance": [
"core",
"dataset"
],
"rootElement": [
"https://spdx.org/spdxdocs/DatasetPackage/01-c69d31e2-43cd-4918-ab26-71f198a85cd1"
],
"spdxId": "https://spdx.org/spdxdocs/Bom/01-5b1aeec4-be22-44dc-ae37-4594e38e5119",
"type": "Bom"
},
{
"builtTime": "2024-11-06T17:30:00Z",
"comment": "See more at: https://github.com/PyThaiNLP/wisesight-sentiment/blob/master/README.md",
"creationInfo": "_:creationinfo",
"dataset_anonymizationMethodUsed": [
"masking",
"removal"
],
"dataset_confidentialityLevel": "clear",
"dataset_dataCollectionProcess": "Data was collected from approximately 2016 to early 2019, with a small amount from other periods. Collection was made only from messages that made available to the public on the internet (websites, blogs, social network sites). For Facebook, this means the public comments (everyone can see) that made on a public page. Private/protected messages and messages in groups, chat, and inbox are not included.",
"dataset_dataPreprocessing": [
"Large amount of messages are not in their original form. Personal data are removed or masked.",
"Usernames and non-public figure names are removed.",
"Phone numbers are masked (e.g., 088-888-8888, 09-9999-9999, 0-2222-2222).",
"Duplicated, leading, and trailing whitespaces are removed. Other punctuations, symbols, and emojis are kept intact.",
"(Mis)spellings are kept intact.",
"Messages longer than 2,000 characters are removed.",
"Long non-Thai messages are removed. Duplicated message (exact match) are removed.",
"Sentiment value annotation is added, using the following methodology.",
"Sentiment values are assigned by human annotators.",
"A human annotator put his/her best effort to assign just one label, out of four, to a message.",
"A message can be ambiguous. When possible, the judgement will be based solely on the text itself.",
"In some situation, like when the context is missing, the annotator may have to rely on his/her own world knowledge and just guess.",
"In some cases, the human annotator may have access to the message's context, like an image. These additional information are not included as part of this corpus.",
"Agreement, enjoyment, and satisfaction are positive. Disagreement, sadness, and disappointment are negative.",
"Showing interest in a topic or in a product is counted as positive.",
"In this sense, a question about a particular product could have a positive sentiment value, if it shows the interest in the product.",
"Saying that other product or service is better is counted as negative.",
"General information or news title tend to be counted as neutral."
],
"dataset_datasetAvailability": "directDownload",
"dataset_datasetSize": 6325249,
"dataset_datasetType": [
"categorical",
"text"
],
"dataset_datasetUpdateMechanism": "manual",
"dataset_hasSensitivePersonalInformation": "no",
"dataset_intendedUse": "For training and evaluation of sentiment analysis models.",
"dataset_knownBias": [
"The corpus does not statistically represent the language register or the proportion of diverse Thai dialects.",
"The primary language of this corpus is Central Thai.",
"Due to the nature of social media, the language style tends to be informal and conversational.",
"However, it also contains some more formal language from news headlines and advertisements.",
"The domains are mixed, with the majority being consumer products and services (restaurants, cosmetics, drinks, cars, hotels), and a smaller portion consisting of current affairs."
],
"description": "Social media messages in Thai language with sentiment label (positive, neutral, negative, question). Contains 26,737 messages. Released to public domain under Creative Commons Zero v1.0 Universal license. More characteristics of the data can be explored by this notebook: https://github.com/PyThaiNLP/wisesight-sentiment/blob/master/exploration.ipynb.",
"name": "PyThaiNLP/Wisesight Sentiment Corpus with Word Tokenization Label",
"originatedBy": [
"https://pythainlp.org/",
"https://wisesight.com/"
],
"releaseTime": "2024-11-06T19:30:00Z",
"software_copyrightText": "Dedicated to the public domain under CC0 1.0 Universal by Wisesight (Thailand) Co., Ltd. and PyThaiNLP project.",
"software_downloadLocation": "https://github.com/PyThaiNLP/wisesight-sentiment/releases",
"software_homePage": "https://github.com/PyThaiNLP/wisesight-sentiment/",
"software_packageVersion": "1.1",
"software_primaryPurpose": "data",
"spdxId": "https://spdx.org/spdxdocs/DatasetPackage/01-c69d31e2-43cd-4918-ab26-71f198a85cd1",
"summary": "Social media messages in Thai language with sentiment label. Contains 26,737 messages.",
"type": "dataset_DatasetPackage"
},
{
"contentType": "text/plain;charset=UTF-8",
"creationInfo": "_:creationinfo",
"name": "LICENSE",
"originatedBy": [
"https://pythainlp.org/"
],
"releaseTime": "2020-05-20T20:00:00Z",
"software_primaryPurpose": "documentation",
"spdxId": "https://spdx.org/spdxdocs/File/01-0710b717-a9c1-41de-af42-f41827c30ccc",
"summary": "License file for the dataset, which is CC0 1.0 Universal.",
"type": "software_File"
},
{
"contentType": "text/plain;charset=UTF-8",
"creationInfo": "_:creationinfo",
"name": "neg.txt",
"originatedBy": [
"https://wisesight.com/"
],
"releaseTime": "2019-03-31T20:00:00Z",
"software_primaryPurpose": "data",
"spdxId": "https://spdx.org/spdxdocs/File/02-32d6319e-e619-42f8-8834-15792c780e7c",
"summary": "Negative sentiment messages. 14,561 messages.",
"type": "software_File"
},
{
"contentType": "text/plain;charset=UTF-8",
"creationInfo": "_:creationinfo",
"name": "neu.txt",
"originatedBy": [
"https://wisesight.com/"
],
"releaseTime": "2019-03-31T20:00:00Z",
"software_primaryPurpose": "data",
"spdxId": "https://spdx.org/spdxdocs/File/03-c2ba0c57-d1fd-4fad-8daa-227f86e62d70",
"summary": "Neutral sentiment messages. 6,823 messages.",
"type": "software_File"
},
{
"contentType": "text/plain;charset=UTF-8",
"creationInfo": "_:creationinfo",
"name": "pos.txt",
"originatedBy": [
"https://wisesight.com/"
],
"releaseTime": "2019-03-31T20:00:00Z",
"software_primaryPurpose": "data",
"spdxId": "https://spdx.org/spdxdocs/File/04-743ce7f8-b4a6-476f-bf3a-eb10bea3d3b2",
"summary": "Positive sentiment messages. 4,778 messages",
"type": "software_File"
},
{
"contentType": "text/plain;charset=UTF-8",
"creationInfo": "_:creationinfo",
"name": "q.txt",
"originatedBy": [
"https://wisesight.com/"
],
"releaseTime": "2019-03-31T20:00:00Z",
"software_primaryPurpose": "data",
"spdxId": "https://spdx.org/spdxdocs/File/05-c9bfbf0d-8993-4a5c-ab98-22bd2e1044b6",
"summary": "Question messages. 575 messages.",
"type": "software_File"
},
{
"contentType": "text/plain;charset=UTF-8",
"creationInfo": "_:creationinfo",
"name": "README.md",
"originatedBy": [
"https://pythainlp.org/"
],
"releaseTime": "2024-11-06T19:30:00Z",
"software_primaryPurpose": "documentation",
"spdxId": "https://spdx.org/spdxdocs/File/06-c79a169b-df75-4a01-aa8b-713a5dc86441",
"summary": "README file for the dataset. Contains information about the dataset, its collection process, and its annotation methodology.",
"type": "software_File"
},
{
"creationInfo": "_:creationinfo",
"from": "https://spdx.org/spdxdocs/DatasetPackage/01-c69d31e2-43cd-4918-ab26-71f198a85cd1",
"relationshipType": "contains",
"spdxId": "https://spdx.org/spdxdocs/Relationship/contains-01-805ea98f-16a4-4b37-b706-387403d39d2d",
"summary": "DatasetPackage/01 contains LICENSE, neg.txt, neu.txt, pos.txt, q.txt, and README.md.",
"to": [
"https://spdx.org/spdxdocs/File/01-0710b717-a9c1-41de-af42-f41827c30ccc",
"https://spdx.org/spdxdocs/File/02-32d6319e-e619-42f8-8834-15792c780e7c",
"https://spdx.org/spdxdocs/File/03-c2ba0c57-d1fd-4fad-8daa-227f86e62d70",
"https://spdx.org/spdxdocs/File/04-743ce7f8-b4a6-476f-bf3a-eb10bea3d3b2",
"https://spdx.org/spdxdocs/File/05-c9bfbf0d-8993-4a5c-ab98-22bd2e1044b6",
"https://spdx.org/spdxdocs/File/06-c79a169b-df75-4a01-aa8b-713a5dc86441"
],
"type": "Relationship"
},
{
"creationInfo": "_:creationinfo",
"from": "https://spdx.org/spdxdocs/File/06-c79a169b-df75-4a01-aa8b-713a5dc86441",
"relationshipType": "describes",
"spdxId": "https://spdx.org/spdxdocs/Relationship/describes-01-d8785f35-497b-4e0a-8fac-d4eede982e4e",
"summary": "README.md describes neg.txt, neu.txt, pos.txt, and q.txt.",
"to": [
"https://spdx.org/spdxdocs/File/02-32d6319e-e619-42f8-8834-15792c780e7c",
"https://spdx.org/spdxdocs/File/03-c2ba0c57-d1fd-4fad-8daa-227f86e62d70",
"https://spdx.org/spdxdocs/File/04-743ce7f8-b4a6-476f-bf3a-eb10bea3d3b2",
"https://spdx.org/spdxdocs/File/05-c9bfbf0d-8993-4a5c-ab98-22bd2e1044b6"
],
"type": "Relationship"
},
{
"creationInfo": "_:creationinfo",
"from": "https://spdx.org/spdxdocs/DatasetPackage/01-c69d31e2-43cd-4918-ab26-71f198a85cd1",
"relationshipType": "hasConcludedLicense",
"spdxId": "https://spdx.org/spdxdocs/Relationship/concludedLicense-01-3cfe0c7d-1a2d-4609-b468-0b2c2ed50501",
"summary": "DatasetPackage/01 has a concluded license as CC0-1.0.",
"to": [
"https://spdx.org/licenses/CC0-1.0"
],
"type": "Relationship"
},
{
"creationInfo": "_:creationinfo",
"from": "https://spdx.org/spdxdocs/DatasetPackage/01-c69d31e2-43cd-4918-ab26-71f198a85cd1",
"relationshipType": "hasDeclaredLicense",
"spdxId": "https://spdx.org/spdxdocs/Relationship/declaredLicense-01-fbc5f8d0-e7ab-45a2-b5b5-1cabe6e697ac",
"summary": "DatasetPackage/01 has a declared license as CC0-1.0.",
"to": [
"https://spdx.org/licenses/CC0-1.0"
],
"type": "Relationship"
}
]
}