diff --git a/nougat/dataset/create_index.py b/nougat/dataset/create_index.py index 68bb68b..c2b5b91 100644 --- a/nougat/dataset/create_index.py +++ b/nougat/dataset/create_index.py @@ -41,7 +41,7 @@ def read_metadata(data: Dict) -> List[List[Dict]]: out = [[] for _ in range(N)] # pdffigures2 meta data if "pdffigures" in data and data["pdffigures"]: - for item in data["pdffigures"]: + for item in data["pdffigures"]["figures"]: p = item.pop("page", None) if p is None or p >= N: continue