4
4
import org .jsoup .integration .ParseTest ;
5
5
import org .jsoup .nodes .Document ;
6
6
import org .jsoup .nodes .Element ;
7
+ import org .jsoup .nodes .Node ;
7
8
import org .jsoup .select .Elements ;
8
- import org .jspecify .annotations .NullMarked ;
9
9
import org .junit .jupiter .api .Test ;
10
10
11
11
import java .io .BufferedReader ;
12
12
import java .io .File ;
13
- import java .io .FileReader ;
14
13
import java .io .IOException ;
15
14
import java .io .InputStreamReader ;
16
15
import java .nio .charset .StandardCharsets ;
17
16
import java .nio .file .Files ;
18
17
import java .util .Iterator ;
18
+ import java .util .List ;
19
19
import java .util .NoSuchElementException ;
20
20
21
21
import static org .junit .jupiter .api .Assertions .*;
@@ -37,6 +37,18 @@ void canStream() {
37
37
}
38
38
}
39
39
40
+ @ Test
41
+ void canStreamXml () {
42
+ String html = "<outmost><DIV id=1>D1</DIV><div id=2>D2<p id=3><span>P One</p><p id=4>P Two</p></div><div id=5>D3<p id=6>P three</p>" ;
43
+ try (StreamParser parser = new StreamParser (Parser .xmlParser ()).parse (html , "" )) {
44
+ StringBuilder seen ;
45
+ seen = new StringBuilder ();
46
+ parser .stream ().forEachOrdered (el -> trackSeen (el , seen ));
47
+ assertEquals ("DIV#1[D1]+;span[P One];p#3+;p#4[P Two];div#2[D2]+;p#6[P three];div#5[D3];outmost;" , seen .toString ());
48
+ // checks expected order, and the + indicates that element had a next sibling at time of emission
49
+ }
50
+ }
51
+
40
52
@ Test void canIterate () {
41
53
// same as stream, just a different interface
42
54
String html = "<title>Test</title></head><div id=1>D1</div><div id=2>D2<p id=3><span>P One</p><p id=4>P Two</p></div><div id=5>D3<p id=6>P three</p>" ;
@@ -327,4 +339,94 @@ private static CharacterReader getReader(StreamParser streamer) {
327
339
// the reader should be closed as streamer is closed on completion of read
328
340
assertTrue (isClosed (streamer ));
329
341
}
342
+
343
+ // Fragments
344
+
345
+ @ Test
346
+ void canStreamFragment () {
347
+ String html = "<tr id=1><td>One</td><tr id=2><td>Two</td></tr><tr id=3><td>Three</td></tr>" ;
348
+ Element context = new Element ("table" );
349
+
350
+ try (StreamParser parser = new StreamParser (Parser .htmlParser ()).parseFragment (html , context , "" )) {
351
+ StringBuilder seen = new StringBuilder ();
352
+ parser .stream ().forEachOrdered (el -> trackSeen (el , seen ));
353
+ assertEquals ("td[One];tr#1+;td[Two];tr#2+;td[Three];tr#3;tbody;table;" , seen .toString ());
354
+ // checks expected order, and the + indicates that element had a next sibling at time of emission
355
+ // note that we don't get a full doc, just the fragment (and the context at the end of the stack)
356
+
357
+ assertTrue (isClosed (parser )); // as read to completion
358
+ }
359
+ }
360
+
361
+ @ Test void canIterateFragment () {
362
+ // same as stream, just a different interface
363
+ String html = "<tr id=1><td>One</td><tr id=2><td>Two</td></tr><tr id=3><td>Three</td></tr>" ; // missing </tr>, following <tr> infers it
364
+ Element context = new Element ("table" );
365
+
366
+ try (StreamParser parser = new StreamParser (Parser .htmlParser ()).parseFragment (html , context , "" )) {
367
+ StringBuilder seen = new StringBuilder ();
368
+
369
+ Iterator <Element > it = parser .iterator ();
370
+ while (it .hasNext ()) {
371
+ trackSeen (it .next (), seen );
372
+ }
373
+
374
+ assertEquals ("td[One];tr#1+;td[Two];tr#2+;td[Three];tr#3;tbody;table;" , seen .toString ());
375
+ // checks expected order, and the + indicates that element had a next sibling at time of emission
376
+ // note that we don't get a full doc, just the fragment (and the context at the end of the stack)
377
+
378
+ assertTrue (isClosed (parser )); // as read to completion
379
+ }
380
+ }
381
+
382
+ @ Test
383
+ void canSelectAndCompleteFragment () throws IOException {
384
+ String html = "<tr id=1><td>One</td><tr id=2><td>Two</td></tr><tr id=3><td>Three</td></tr>" ;
385
+ Element context = new Element ("table" );
386
+
387
+ try (StreamParser parser = new StreamParser (Parser .htmlParser ()).parseFragment (html , context , "" )) {
388
+ Element first = parser .expectNext ("td" );
389
+ assertEquals ("One" , first .ownText ());
390
+
391
+ Element el = parser .expectNext ("td" );
392
+ assertEquals ("Two" , el .ownText ());
393
+
394
+ el = parser .expectNext ("td" );
395
+ assertEquals ("Three" , el .ownText ());
396
+
397
+ el = parser .selectNext ("td" );
398
+ assertNull (el );
399
+
400
+ List <Node > nodes = parser .completeFragment ();
401
+ assertEquals (1 , nodes .size ()); // should be the inferred tbody
402
+ Node tbody = nodes .get (0 );
403
+ assertEquals ("tbody" , tbody .nodeName ());
404
+ List <Node > trs = tbody .childNodes ();
405
+ assertEquals (3 , trs .size ()); // should be the three TRs
406
+ assertSame (trs .get (0 ).childNode (0 ), first ); // tr -> td
407
+
408
+ assertSame (parser .document (), first .ownerDocument ()); // the shell document for this fragment
409
+ }
410
+ }
411
+
412
+ @ Test
413
+ void canStreamFragmentXml () throws IOException {
414
+ String html = "<tr id=1><td>One</td></tr><tr id=2><td>Two</td></tr><tr id=3><td>Three</td></tr>" ;
415
+ Element context = new Element ("Other" );
416
+
417
+ try (StreamParser parser = new StreamParser (Parser .xmlParser ()).parseFragment (html , context , "" )) {
418
+ StringBuilder seen = new StringBuilder ();
419
+ parser .stream ().forEachOrdered (el -> trackSeen (el , seen ));
420
+ assertEquals ("td[One];tr#1+;td[Two];tr#2+;td[Three];tr#3;" , seen .toString ());
421
+ // checks expected order, and the + indicates that element had a next sibling at time of emission
422
+ // note that we don't get a full doc, just the fragment
423
+
424
+ assertTrue (isClosed (parser )); // as read to completion
425
+
426
+ List <Node > nodes = parser .completeFragment ();
427
+ assertEquals (3 , nodes .size ());
428
+ assertEquals ("tr" , nodes .get (0 ).nodeName ());
429
+ }
430
+ }
431
+
330
432
}
0 commit comments