-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfilter.php
51 lines (43 loc) · 1.26 KB
/
filter.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
<?php
header("Content-Type: text/plain"); // We choose to display the content as plain text
include 'simple_html_dom.php';
// We retrieve the contents using file_get_html from simple_html_dom
$html_dom = file_get_html('data.html');
//echo $html_dom;
// Getting all of the table rows
$table_rows = $html_dom->find('td');
$total = count($table_rows);
$total = $total - 2;
//$sample = $table_rows[58]->plaintext;
//echo $sample;
// echo $table_rows[0]->getAttribute('class');
// echo "\n";
// preg_match_all('!\d+!', $sample, $matches);
// print_r($matches);
function cleanText($text_to_clean_up)
{
return trim(preg_replace('/[\t\n\r\s]+/', ' ', $text_to_clean_up));
}
$data = [];
$header = cleanText($table_rows[0]->plaintext);
$i = 1;
while ($i < $total) {
$className = $table_rows[$i]->getAttribute('class');
if ($className == 'bottom_head') {
$header = $table_rows[$i]->plaintext;
$i = $i + 1;
}
$item = [];
array_push(
$item,
$header,
cleanText($table_rows[$i]->plaintext),
cleanText($table_rows[$i + 1]->plaintext),
cleanText($table_rows[$i + 2]->plaintext),
);
array_push($data, $item);
$i = $i + 3;
}
$output = json_encode($data);
file_put_contents('output.json', $output);
echo "\n";