import requests import os import json import argparse PROD_URL = "https://app.chunkify.io" def upload_file_and_process(filename, api_key, base_url, outdir, chunkify_dir=None): """ Manages the full upload and processing workflow. """ if not os.path.exists(filename): print(f"❌ Error: File not found at '{filename}'") return headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json" } # 1. Get the upload URL print(f"Step 1: Requesting upload URL from {base_url}...") try: upload_details_url = f"{base_url}/api/v1/upload" payload = {"file": os.path.basename(filename), "type": "application/pdf"} if chunkify_dir: payload['chunkifyPath'] = chunkify_dir print(f"ℹ️ Sending with chunkifyPath: {chunkify_dir}") response = requests.post(upload_details_url, headers=headers, json=payload) response.raise_for_status() upload_data = response.json() doc_id = upload_data.get("docID") upload_url = upload_data.get("uploadURL") print(f"✅ Received DocID: {doc_id}") except requests.exceptions.HTTPError as err: try: error_message = err.response.json().get("message", err.response.text) except json.JSONDecodeError: error_message = err.response.text print(f"❌ Error fetching upload details. Status: {err.response.status_code}, Response: {error_message}") return except Exception as e: print(f"❌ An unexpected error occurred: {e}") return # 2. Upload the file to the signed URL print("\nStep 2: Uploading file content...") try: with open(filename, 'rb') as f: upload_response = requests.put(upload_url, data=f, headers={"Content-Type": "application/pdf"}) upload_response.raise_for_status() print("✅ File content uploaded successfully.") except requests.exceptions.HTTPError as err: print(f"❌ File upload failed. Status: {err.response.status_code}, Details: {err.response.text}") print("Attempting to delete document record...") delete_url = f"{base_url}/api/docs/delete" requests.post(delete_url, headers=headers, json={"docID": doc_id}) return except Exception as e: print(f"❌ An unexpected error occurred during file upload: {e}") return # 3. Start the processing job and save the ZIP file directly print("\nStep 3: Starting processing job...") try: process_url = f"{base_url}/api/v1/process" process_response = requests.post(process_url, headers=headers, json={"docID": doc_id}) process_response.raise_for_status() print("✅ Processing job finished. Saving ZIP file...") # Get the page count from the response header num_pages = process_response.headers.get('X-Page-Count') if num_pages: print(f"📄 Billing info: Document has {num_pages} pages.") # Ensure the output directory exists os.makedirs(outdir, exist_ok=True) zip_filepath = os.path.join(outdir, f"{doc_id}.zip") # Write the binary content of the response directly to a file with open(zip_filepath, 'wb') as f: f.write(process_response.content) print(f"✅ Successfully saved ZIP file: {zip_filepath}") except requests.exceptions.HTTPError as err: print(f"❌ Processing failed. Status: {err.response.status_code}, Details: {err.response.text}") except Exception as e: print(f"❌ An unexpected error occurred during processing: {e}") def main(): """Main function to parse arguments and start the process.""" parser = argparse.ArgumentParser( description="Upload a PDF, process it, and download the results as a ZIP file containing DITA.", formatter_class=argparse.RawTextHelpFormatter ) parser.add_argument("filename", help="The path to the PDF file to upload.") parser.add_argument("--api-key", required=True, help="Your API key for authentication.") parser.add_argument( "--outdir", default=".", help="Optional: Relative path to save the output ZIP file.\nDefaults to the current directory." ) parser.add_argument( "--chunkify-dir", help="Optional: A virtual directory path to specify segments (e.g., 'max/test')." ) args = parser.parse_args() BASE_URL = PROD_URL print(f"Target environment: {BASE_URL}") upload_file_and_process(args.filename, args.api_key, BASE_URL, args.outdir, args.chunkify_dir) if __name__ == "__main__": main()