For llama cloud parser, can the API endpoint deal with batch job? #14780
Unanswered
timothylimyl
asked this question in
Q&A
Replies: 1 comment 4 replies
-
Yes, the Llama Cloud Parser API can handle batch jobs. You can modify your script to process multiple documents simultaneously by using the import os
import requests
import time
import threading
from functools import wraps
API_KEY = 'llx-pleasedonotrobme'
BASE_URL = 'https://api.cloud.llamaindex.ai/api/parsing'
def rate_limit(*, allowed_per_second: int):
max_period = 1.0 / allowed_per_second
last_call = [time.perf_counter()]
lock = threading.Lock()
def decorate(func):
@wraps(func)
def limit(*args, **kwargs):
with lock:
elapsed = time.perf_counter() - last_call[0]
hold = max_period - elapsed
if hold > 0:
time.sleep(hold)
result = func(*args, **kwargs)
last_call[0] = time.perf_counter()
return result
return limit
return decorate
@rate_limit(allowed_per_second=15)
def upload_file(file_path):
url = f'{BASE_URL}/upload'
headers = {
'accept': 'application/json',
'Authorization': f'Bearer {API_KEY}',
}
files = {
'file': (file_path, open(file_path, 'rb'), 'application/pdf')
}
response = requests.post(url, headers=headers, files=files)
try:
if response.status_code == 200:
print('File uploaded successfully')
return response.json()
else:
print('Failed to upload file')
print('Status code:', response.status_code)
print('Response:', response.text)
finally:
for file_entry in files:
file_entry[1][1].close()
def check_parsing_job_status(job_id):
url = f'{BASE_URL}/job/{job_id}'
headers = {
'accept': 'application/json',
'Authorization': f'Bearer {API_KEY}'
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.json()
else:
print('Failed to retrieve job status')
print('Status code:', response.status_code)
print('Response:', response.text)
def get_parsing_job_result_in_markdown(job_id):
url = f'{BASE_URL}/job/{job_id}/result/markdown'
headers = {
'accept': 'application/json',
'Authorization': f'Bearer {API_KEY}'
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.json()
else:
print('Failed to retrieve job result in Markdown')
print('Status code:', response.status_code)
print('Response:', response.text)
if __name__ == '__main__':
directory_path = '/home/Obama/Downloads/test_pdf'
file_paths = [os.path.join(directory_path, file) for file in os.listdir(directory_path)]
job_ids = []
for file_path in file_paths:
status = upload_file(file_path)
if status:
job_ids.append(status.get("id", None))
for job_id in job_ids:
while True:
job_status = check_parsing_job_status(job_id)
if job_status and job_status["status"] == "SUCCESS":
result = get_parsing_job_result_in_markdown(job_id)
markdown_text = result.get("markdown", None)
with open(f'{file_path}.md', 'w') as f:
f.write(markdown_text)
break
time.sleep(5) This script includes a rate-limiting decorator to manage the API's rate limits and processes multiple files in a batch. The |
Beta Was this translation helpful? Give feedback.
4 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
-
Currently, based on the endpoint given on your documentation: https://docs.cloud.llamaindex.ai/llamaparse/getting_started/api
as per dosu bot reply to handle batch job
Beta Was this translation helpful? Give feedback.
All reactions