-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfinaltest.py
More file actions
27 lines (21 loc) · 1.17 KB
/
finaltest.py
File metadata and controls
27 lines (21 loc) · 1.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import pdfplumber
def final_attempt_parser(pdf_path):
with pdfplumber.open(pdf_path) as pdf:
for i, page in enumerate(pdf.pages):
print(f"\n--- Page {i+1} Map ---")
words = page.extract_words()
bars = [r for r in page.rects if r['width'] < 100]
for bar in bars:
# Find the 'closest' word by calculating distance to the bar's center
bar_center_x = (bar['x0'] + bar['x1']) / 2
bar_center_y = (bar['y0'] + bar['y1']) / 2
# Sort all words on the page by how close they are to this specific bar
closest_words = sorted(words, key=lambda w:
abs((w['x0'] + w['x1'])/2 - bar_center_x) +
abs((w['top'] + w['bottom'])/2 - bar_center_y)
)
# Get the top 2 closest words (one might be the value, one the label)
neighbors = [w['text'] for w in closest_words[:2]]
print(f"Bar at X={round(bar['x0'])} | Height: {round(bar['height'], 2)} | Likely Labels: {neighbors}")
# Replace with your actual filename
final_attempt_parser("123.pdf")