Skip to content

Commit 9522c3d

Browse files
authored
Merge pull request #16 from UniversalDataTool/custom-regex
Introduce Custom Regex for Tokenizing Words
2 parents 18d998a + 69e9f52 commit 9522c3d

File tree

10 files changed

+287
-152
lines changed

10 files changed

+287
-152
lines changed

src/components/Document/index.js

+30-127
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,14 @@ import type {
66
Relationship
77
} from "../../types"
88
import { styled } from "@material-ui/styles"
9-
import stringToSequence from "../../string-to-sequence.js"
10-
import Tooltip from "@material-ui/core/Tooltip"
119
import RelationshipArrows from "../RelationshipArrows"
1210
import colors from "../../colors"
1311
import ArrowToMouse from "../ArrowToMouse"
1412
import { useTimeout, useWindowSize } from "react-use"
13+
import SequenceItem from "../SequenceItem"
1514
import classNames from "classnames"
15+
import stringToSequence from "../../string-to-sequence"
16+
import useEventCallback from "use-event-callback"
1617

1718
const Container = styled("div")(({ relationshipsOn }) => ({
1819
lineHeight: 1.5,
@@ -21,50 +22,6 @@ const Container = styled("div")(({ relationshipsOn }) => ({
2122
flexWrap: "wrap"
2223
}))
2324

24-
const SequenceItem = styled("span")(({ color, relationshipsOn }) => ({
25-
display: "inline-flex",
26-
cursor: "pointer",
27-
backgroundColor: color,
28-
color: "#fff",
29-
padding: 4,
30-
margin: 4,
31-
marginBottom: relationshipsOn ? 64 : 4,
32-
paddingLeft: 10,
33-
paddingRight: 10,
34-
borderRadius: 4,
35-
userSelect: "none",
36-
boxSizing: "border-box",
37-
"&.unlabeled": {
38-
color: "#333",
39-
paddingTop: 4,
40-
paddingBottom: 4,
41-
paddingLeft: 2,
42-
paddingRight: 2,
43-
".notSpace:hover": {
44-
paddingTop: 2,
45-
paddingBottom: 2,
46-
paddingLeft: 0,
47-
paddingRight: 0,
48-
border: `2px dashed #ccc`
49-
}
50-
}
51-
}))
52-
53-
const LabeledText = styled("div")({
54-
display: "inline-flex",
55-
cursor: "pointer",
56-
alignSelf: "center",
57-
fontSize: 11,
58-
width: 18,
59-
height: 18,
60-
alignItems: "center",
61-
justifyContent: "center",
62-
marginLeft: 4,
63-
borderRadius: 9,
64-
color: "#fff",
65-
backgroundColor: "rgba(0,0,0,0.2)"
66-
})
67-
6825
type Props = {
6926
sequence: Array<SequenceItemData>,
7027
relationships: Array<Relationship>,
@@ -126,6 +83,16 @@ export default function Document({
12683
highlightedItems.push(i)
12784
}
12885

86+
const onRemoveLabel = useEventCallback(sequenceItemIndex => {
87+
onSequenceChange(
88+
sequence
89+
.flatMap((s, i) =>
90+
i !== sequenceItemIndex ? s : stringToSequence(s.text)
91+
)
92+
.filter(s => s.text.length > 0)
93+
)
94+
})
95+
12996
return (
13097
<Container
13198
relationshipsOn={Boolean(relationships)}
@@ -142,89 +109,25 @@ export default function Document({
142109
>
143110
{sequence.map((seq, i) => (
144111
<SequenceItem
145-
key={seq.textId || i}
146-
ref={elm => {
147-
if (!elm) return
148-
sequenceItemPositionsRef.current[seq.textId] = {
149-
offset: {
150-
left: elm.offsetLeft,
151-
top: elm.offsetTop,
152-
width: elm.offsetWidth,
153-
height: elm.offsetHeight
154-
}
155-
}
156-
}}
112+
{...seq}
113+
sequenceItemIndex={i}
114+
sequenceItemPositionsRef={sequenceItemPositionsRef}
157115
relationshipsOn={Boolean(relationships)}
158-
onMouseUp={e => {
159-
if (!createRelationshipsMode) return
160-
if (!secondSequenceItem) {
161-
setFirstSequenceItem(null)
162-
setSecondSequenceItem(null)
163-
onCreateEmptyRelationship([firstSequenceItem, seq.textId])
164-
} else {
165-
setFirstSequenceItem(null)
166-
setSecondSequenceItem(null)
167-
}
168-
}}
169-
onMouseDown={() => {
170-
if (createRelationshipsMode) {
171-
if (!firstSequenceItem) {
172-
setFirstSequenceItem(seq.textId)
173-
}
174-
} else {
175-
if (seq.label) return
176-
changeHighlightedRange([i, i])
177-
}
178-
}}
179-
onMouseMove={() => {
180-
if (!mouseDown) return
181-
if (!createRelationshipsMode) {
182-
if (seq.label) return
183-
if (i !== lastSelected) {
184-
changeHighlightedRange([
185-
firstSelected === null ? i : firstSelected,
186-
i
187-
])
188-
}
189-
}
190-
}}
191-
className={classNames(
192-
seq.label ? "label" : "unlabeled",
193-
seq.text.trim().length > 0 && "notSpace"
194-
)}
195-
color={
196-
seq.label
197-
? seq.color || colorLabelMap[seq.label] || "#333"
198-
: !createRelationshipsMode &&
199-
seq.text !== " " &&
200-
highlightedItems.includes(i)
201-
? "#ccc"
202-
: "inherit"
203-
}
116+
createRelationshipsMode={createRelationshipsMode}
117+
onChangeFirstSequenceItem={setFirstSequenceItem}
118+
onChangeSecondSequenceItem={setSecondSequenceItem}
119+
onCreateEmptyRelationship={onCreateEmptyRelationship}
120+
onChangeHighlightedRange={changeHighlightedRange}
121+
firstSequenceItem={firstSequenceItem}
122+
secondSequenceItem={secondSequenceItem}
123+
mouseDown={mouseDown}
124+
firstSelected={firstSelected}
125+
lastSelected={lastSelected}
126+
isHighlighted={highlightedItems.includes(i)}
127+
onRemoveLabel={onRemoveLabel}
128+
color={seq.color || colorLabelMap[seq.label]}
204129
key={i}
205-
>
206-
{seq.label ? (
207-
<Tooltip title={seq.label} placement="bottom">
208-
<div>{seq.text}</div>
209-
</Tooltip>
210-
) : (
211-
<div>{seq.text}</div>
212-
)}
213-
{seq.label && !createRelationshipsMode && (
214-
<LabeledText
215-
onClick={e => {
216-
e.stopPropagation()
217-
onSequenceChange(
218-
sequence
219-
.flatMap(s => (s !== seq ? s : stringToSequence(s.text)))
220-
.filter(s => s.text.length > 0)
221-
)
222-
}}
223-
>
224-
<span>{"\u2716"}</span>
225-
</LabeledText>
226-
)}
227-
</SequenceItem>
130+
/>
228131
))}
229132
{firstSequenceItem && !secondSequenceItem && (
230133
<ArrowToMouse

src/components/Document/index.story.js

+29-17
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,14 @@ storiesOf("Document", module)
1818
Math.random() < 0.9
1919
? { text: text + " " }
2020
: {
21-
text: text + " ",
22-
label:
23-
"somelabel" +
24-
Math.random()
25-
.toString()
26-
.slice(-4),
27-
color: "#9638F9"
28-
}
21+
text: text + " ",
22+
label:
23+
"somelabel" +
24+
Math.random()
25+
.toString()
26+
.slice(-4),
27+
color: "#9638F9"
28+
}
2929
)}
3030
/>
3131
))
@@ -41,15 +41,15 @@ storiesOf("Document", module)
4141
Math.random() < 0.9
4242
? { text: text + " ", textId: `l${i}` }
4343
: {
44-
text: text + " ",
45-
textId: `l${i}`,
46-
label:
47-
"somelabel" +
48-
Math.random()
49-
.toString()
50-
.slice(-4),
51-
color: "#9638F9"
52-
}
44+
text: text + " ",
45+
textId: `l${i}`,
46+
label:
47+
"somelabel" +
48+
Math.random()
49+
.toString()
50+
.slice(-4),
51+
color: "#9638F9"
52+
}
5353
)}
5454
relationships={[
5555
{
@@ -60,3 +60,15 @@ storiesOf("Document", module)
6060
]}
6161
/>
6262
))
63+
.add("Character Sequence", () => (
64+
<Document
65+
onSequenceChange={action("onSequenceChange")}
66+
onHighlightedChanged={action("onHighlightedChanged")}
67+
sequence={`Barack Hussein Obama II (born August 4, 1961) is an American attorney and politician who served as the 44th President of the United States from January 20, 2009, to January 20, 2017. A member of the Democratic Party, he was the first African American to serve as president. He was previously a United States Senator from Illinois and a member of the Illinois State Senate.`
68+
.split("")
69+
.map(c => ({
70+
text: c
71+
}))
72+
}
73+
/>
74+
))

src/components/DocumentLabeler/index.js

+9-4
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,10 @@ export default function DocumentLabeler(props: LabelDocumentProps) {
1212
const [selectedLabels, changeSelectedLabels] = useState(
1313
props.initialLabels || (props.initialLabel ? [props.initialLabel] : [])
1414
)
15-
const sequence = useMemo(() => stringToSequence(props.document), [
16-
props.document
17-
])
15+
const sequence = useMemo(
16+
() => stringToSequence(props.document, props.separatorRegex),
17+
[props.document]
18+
)
1819
return (
1920
<div>
2021
<div>
@@ -58,7 +59,11 @@ export default function DocumentLabeler(props: LabelDocumentProps) {
5859
)
5960
})}
6061
</div>
61-
<Document nothingHighlighted sequence={sequence} />
62+
<Document
63+
nothingHighlighted
64+
sequence={sequence}
65+
separatorRegex={props.separatorRegex}
66+
/>
6267
</div>
6368
</div>
6469
)

src/components/NLPAnnotator/index.story.js

+24
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,30 @@ storiesOf("NLPAnnotator", module)
3232
]}
3333
/>
3434
))
35+
.add("Sequence Labeler with Custom Regex", () => (
36+
<NLPAnnotator
37+
hotkeysEnabled
38+
onChange={action("onChange")}
39+
onFinish={action("onFinish")}
40+
onNext={action("onNext")}
41+
onPrev={action("onPrev")}
42+
type="label-sequence"
43+
document={`Lorem ipsum dolor sit amet, consectetur adipiscing elit. Duis pharetra ipsum tristique ligula venenatis placerat. Interdum et malesuada fames ac ante ipsum primis in faucibus. Fusce mollis velit nec tellus sollicitudin aliquam. In velit erat, iaculis id consectetur et, tincidunt sit amet mauris. Quisque ultricies, purus eleifend congue malesuada, ipsum erat molestie dolor, in pellentesque lacus purus vel nisl. Interdum et malesuada fames ac ante ipsum primis in faucibus. Nulla sed vestibulum magna. Quisque ut lorem imperdiet, aliquam velit nec, dictum felis.`}
44+
labels={[
45+
{
46+
color: colors[0],
47+
id: "noun",
48+
displayName: "Noun"
49+
},
50+
{
51+
color: colors[1],
52+
id: "proper-noun",
53+
displayName: "Proper Noun"
54+
}
55+
]}
56+
separatorRegex="."
57+
/>
58+
))
3559
.add("Document Labeler", () => (
3660
<NLPAnnotator
3761
hotkeysEnabled

src/components/RelationshipAnnotator/index.js

+1
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ export default function RelationshipAnnotator(
133133
</LabelSelectorContainer>
134134
<div style={{ borderTop: "1px solid #ccc", marginTop: 8, paddingTop: 5 }}>
135135
<Document
136+
separatorRegex={props.separatorRegex}
136137
colorLabelMap={colorLabelMap}
137138
nothingHighlighted={highlightedItems.length === 0}
138139
onCreateEmptyRelationship={([first, second]) => {

src/components/SequenceAnnotator/index.js

+2-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ export default function SequenceAnnotator(props: SequenceAnnotatorProps) {
1818
? [entity]
1919
: stringToSequence(entity.text, props.separatorRegex)
2020
)
21-
: stringToSequence(props.document)
21+
: stringToSequence(props.document, props.separatorRegex)
2222
)
2323
const colorLabelMap = useMemo(
2424
() =>
@@ -71,6 +71,7 @@ export default function SequenceAnnotator(props: SequenceAnnotatorProps) {
7171
</div>
7272
<div style={{ borderTop: "1px solid #ccc", marginTop: 8, paddingTop: 5 }}>
7373
<Document
74+
separatorRegex={props.separatorRegex}
7475
colorLabelMap={colorLabelMap}
7576
nothingHighlighted={highlightedItems.length === 0}
7677
onHighlightedChanged={highlightedItems =>

0 commit comments

Comments
 (0)