11package com .WingWatch .WebScraping ;
22
3- import java .io .BufferedReader ;
43import java .io .IOException ;
5- import java .io .InputStream ;
6- import java .io .InputStreamReader ;
7- import java .net .HttpURLConnection ;
8- import java .net .URI ;
94import java .net .URISyntaxException ;
10- import java .net .URL ;
115
12- public abstract class Requests {
13- private static InputStream getInputStream (String targetUrl ) throws IOException , URISyntaxException {
14- URL url ;
15- HttpURLConnection connection ;
16-
17- // Create a URL Object
18- url = new URI (targetUrl ).toURL ();
19-
20- connection = (HttpURLConnection ) url .openConnection ();
21- connection .setRequestMethod ("GET" ); // Set the method to s GET Request
6+ import org .jsoup .Jsoup ;
7+ import org .jsoup .nodes .Document ;
228
23- return connection .getInputStream ();
24- }
9+ public abstract class Requests {
10+ public static String requestGetString (String targetUrl ) throws IOException , URISyntaxException {
11+ Document document = Jsoup .connect (targetUrl )
12+ .userAgent ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/121.0.0.0 Safari/537.36" )
13+ .header ("Accept-Language" , "en-US,en;q=0.9" )
14+ .header ("Accept-Encoding" , "gzip, deflate" )
15+ .header ("Referer" , "https://example.com" )
16+ .timeout (10000 ) // replaces connect + read timeout
17+ .ignoreContentType (true ) // allows JSON / plain text too
18+ .get ();
2519
26- public static StringBuilder requestGetString (String targetUrl ) throws IOException , URISyntaxException {
27- final StringBuilder result = new StringBuilder ();
28- InputStream stream = getInputStream (targetUrl );
20+ document .outputSettings ().charset ("UTF-8" );
2921
30- // Perform the request
31- BufferedReader reader = new BufferedReader (new InputStreamReader (stream ));
32- for (String line ; (line = reader .readLine ()) != null ; ) {
33- result .append (line );
34- }
35- stream .close ();
36- return result ;
22+ return document .outerHtml ();
3723 }
3824}
0 commit comments