File tree 1 file changed +20
-3
lines changed
1 file changed +20
-3
lines changed Original file line number Diff line number Diff line change @@ -28,14 +28,31 @@ public function __construct(Extractor $extractor)
28
28
29
29
$ encoding = null ;
30
30
$ contentType = $ extractor ->getResponse ()->getHeaderLine ('content-type ' );
31
- preg_match ('/charset=" ?(.*?)(?=$|\s|;|")/i ' , $ contentType , $ match );
31
+ preg_match ('/charset=(?:"| \' ) ?(.*?)(?=$|\s|;|"| \' |> )/i ' , $ contentType , $ match );
32
32
if (!empty ($ match [1 ])) {
33
33
$ encoding = trim ($ match [1 ], ', ' );
34
- } elseif (!empty ($ html )) {
35
- preg_match ('/charset="?(.*?)(?=$|\s|;|")/i ' , $ html , $ match );
34
+ try {
35
+ $ ret = mb_encoding_aliases ($ encoding );
36
+ if ($ ret === false ) {
37
+ $ encoding = null ;
38
+ }
39
+ } catch (\ValueError $ exception ) {
40
+ $ encoding = null ;
41
+ }
42
+ }
43
+ if (is_null ($ encoding ) && !empty ($ html )) {
44
+ preg_match ('/charset=(?:"| \')?(.*?)(?=$|\s|;|"| \'|>)/i ' , $ html , $ match );
36
45
if (!empty ($ match [1 ])) {
37
46
$ encoding = trim ($ match [1 ], ', ' );
38
47
}
48
+ try {
49
+ $ ret = mb_encoding_aliases ($ encoding );
50
+ if ($ ret === false ) {
51
+ $ encoding = null ;
52
+ }
53
+ } catch (\ValueError $ exception ) {
54
+ $ encoding = null ;
55
+ }
39
56
}
40
57
$ this ->document = !empty ($ html ) ? Parser::parse ($ html , $ encoding ) : new DOMDocument ();
41
58
$ this ->initXPath ();
You can’t perform that action at this time.
0 commit comments