From 1550fc759913c33a304292fd95b26e79f53a401f Mon Sep 17 00:00:00 2001 From: HaimKortovich Date: Fri, 24 Apr 2026 15:16:36 -0500 Subject: [PATCH] dont use _ in metadata --- app/routers/documents.py | 97 ++++++++++++++++++++-------------------- app/s3.py | 4 -- 2 files changed, 48 insertions(+), 53 deletions(-) diff --git a/app/routers/documents.py b/app/routers/documents.py index 5549c65..eeb6b67 100644 --- a/app/routers/documents.py +++ b/app/routers/documents.py @@ -3,7 +3,6 @@ from fastapi import APIRouter, HTTPException, UploadFile, File, Request from datetime import datetime from app import s3, pdf, utils -from app.config import settings from app.enums import DocumentType from app.models import DocumentMetadata, UploadResponse, DownloadUrlResponse, FieldsResponse from app.logger import get_logger @@ -44,13 +43,13 @@ async def upload_document( sanitized_filename = utils.sanitize_filename(file.filename) s3_key = utils.document_s3_key(org_id, document_id, sanitized_filename) - # Prepare metadata + # Prepare metadata metadata_dict = { - "org_id": org_id, - "document_type": document_type.value, - "filename": sanitized_filename, - "file_size": str(file_size), - "created_at": datetime.utcnow().isoformat() + "org-id": org_id, + "document-type": document_type.value, + "filename": sanitized_filename, + "file-size": str(file_size), + "created-at": datetime.utcnow().isoformat() } # Upload to S3 @@ -66,15 +65,15 @@ async def upload_document( # Create metadata response metadata = DocumentMetadata( - document_id=document_id, - org_id=org_id, - document_type=document_type, - filename=sanitized_filename, - content_type=detected_content_type, - file_size=file_size, - s3_key=s3_key, - created_at=datetime.utcnow(), - updated_at=datetime.utcnow() + document_id=document_id, + org_id=org_id, + document_type=document_type, + filename=sanitized_filename, + content_type=detected_content_type, + file_size=file_size, + s3_key=s3_key, + created_at=datetime.utcnow(), + updated_at=datetime.utcnow() ) logger.info(f"Upload completed - document_id: {document_id}") @@ -118,17 +117,17 @@ async def rewrite_document( # Verify org_id matches existing_metadata = s3.get_file_metadata(s3_key) - if existing_metadata.get("org_id") != org_id: + if existing_metadata.get("org-id") != org_id: logger.error(f"Organization mismatch for document: {document_id}") raise HTTPException(status_code=403, detail="Organization mismatch") - # Prepare metadata + # Prepare metadata metadata_dict = { - "org_id": org_id, - "document_type": document_type.value, - "filename": sanitized_filename, - "file_size": str(file_size), - "updated_at": datetime.utcnow().isoformat() + "org-id": org_id, + "document-type": document_type.value, + "filename": sanitized_filename, + "file-size": str(file_size), + "updated-at": datetime.utcnow().isoformat() } # Upload to S3 (overwrites existing) @@ -142,16 +141,16 @@ async def rewrite_document( # Generate download URL download_url = s3.presigned_download_url(s3_key) - # Create metadata response + # Create metadata response metadata = DocumentMetadata( - document_id=document_id, - org_id=org_id, - document_type=document_type, - filename=sanitized_filename, - content_type=detected_content_type, - file_size=file_size, - s3_key=s3_key, - created_at=datetime.fromisoformat(existing_metadata.get("created_at", datetime.utcnow().isoformat())), + document_id=document_id, + org_id=org_id, + document_type=document_type, + filename=sanitized_filename, + content_type=detected_content_type, + file_size=file_size, + s3_key=s3_key, + created_at=datetime.fromisoformat(existing_metadata.get("created-at", datetime.utcnow().isoformat())), updated_at=datetime.utcnow() ) @@ -170,7 +169,7 @@ async def get_document(request: Request, document_id: str): try: response = client.list_objects_v2( - Bucket=settings.s3_bucket, + Bucket=s3.get_bucket_name(), Prefix=prefix, MaxKeys=1 ) @@ -188,13 +187,13 @@ async def get_document(request: Request, document_id: str): s3_metadata = s3.get_file_metadata(s3_key) # Verify org_id matches - if s3_metadata.get("org_id") != org_id: + if s3_metadata.get("org-id") != org_id: logger.error(f"Organization mismatch for document: {document_id}") raise HTTPException(status_code=403, detail="Organization mismatch") # Get object info try: - object_info = client.head_object(Bucket=settings.s3_bucket, Key=s3_key) + object_info = client.head_object(Bucket=s3.get_bucket_name(), Key=s3_key) except Exception as e: logger.error(f"Failed to get object info: {e}") raise HTTPException(status_code=500, detail="Failed to retrieve document") @@ -202,14 +201,14 @@ async def get_document(request: Request, document_id: str): # Create metadata response metadata = DocumentMetadata( document_id=document_id, - org_id=s3_metadata.get("org_id"), - document_type=DocumentType(s3_metadata.get("document_type")), + org_id=s3_metadata.get("org-id"), + document_type=DocumentType(s3_metadata.get("document-type")), filename=s3_metadata.get("filename"), content_type=object_info.get("ContentType"), - file_size=int(s3_metadata.get("file_size", object_info.get("ContentLength", 0))), + file_size=int(s3_metadata.get("file-size", object_info.get("ContentLength", 0))), s3_key=s3_key, - created_at=datetime.fromisoformat(s3_metadata.get("created_at", datetime.utcnow().isoformat())), - updated_at=datetime.fromisoformat(s3_metadata.get("updated_at", datetime.utcnow().isoformat())) + created_at=datetime.fromisoformat(s3_metadata.get("created-at", datetime.utcnow().isoformat())), + updated_at=datetime.fromisoformat(s3_metadata.get("updated-at", datetime.utcnow().isoformat())) ) logger.info(f"Get document completed - document_id: {document_id}") @@ -227,7 +226,7 @@ async def get_download_url(request: Request, document_id: str, expires_in: int = try: response = client.list_objects_v2( - Bucket=settings.s3_bucket, + Bucket=s3.get_bucket_name(), Prefix=prefix, MaxKeys=1 ) @@ -243,7 +242,7 @@ async def get_download_url(request: Request, document_id: str, expires_in: int = # Verify org_id matches s3_metadata = s3.get_file_metadata(s3_key) - if s3_metadata.get("org_id") != org_id: + if s3_metadata.get("org-id") != org_id: logger.error(f"Organization mismatch for document: {document_id}") raise HTTPException(status_code=403, detail="Organization mismatch") @@ -265,7 +264,7 @@ async def get_document_fields(request: Request, document_id: str): try: response = client.list_objects_v2( - Bucket=settings.s3_bucket, + Bucket=s3.get_bucket_name(), Prefix=prefix, MaxKeys=1 ) @@ -283,12 +282,12 @@ async def get_document_fields(request: Request, document_id: str): s3_metadata = s3.get_file_metadata(s3_key) # Verify org_id matches - if s3_metadata.get("org_id") != org_id: + if s3_metadata.get("org-id") != org_id: logger.error(f"Organization mismatch for document: {document_id}") raise HTTPException(status_code=403, detail="Organization mismatch") # Check if PDF - document_type = s3_metadata.get("document_type") + document_type = s3_metadata.get("document-type") if document_type != DocumentType.PDF.value: logger.error(f"Document is not PDF: {document_type}") raise HTTPException(status_code=400, detail="Field discovery only supported for PDF documents") @@ -323,7 +322,7 @@ async def delete_document(request: Request, document_id: str): try: response = client.list_objects_v2( - Bucket=settings.s3_bucket, + Bucket=s3.get_bucket_name(), Prefix=prefix, MaxKeys=1 ) @@ -339,7 +338,7 @@ async def delete_document(request: Request, document_id: str): # Verify org_id matches s3_metadata = s3.get_file_metadata(s3_key) - if s3_metadata.get("org_id") != org_id: + if s3_metadata.get("org-id") != org_id: logger.error(f"Organization mismatch for document: {document_id}") raise HTTPException(status_code=403, detail="Organization mismatch") @@ -348,7 +347,7 @@ async def delete_document(request: Request, document_id: str): s3.delete_file(s3_key) logger.info(f"Document deleted - document_id: {document_id}") except Exception as e: - logger.error(f"Failed to delete document: {e}") - raise HTTPException(status_code=500, detail=f"Failed to delete document: {e}") + logger.error(f"Failed to delete file: {e}") + raise HTTPException(status_code=500, detail=f"Failed to delete file: {e}") return {"message": "Document deleted successfully"} diff --git a/app/s3.py b/app/s3.py index b2d7f2d..724a49c 100644 --- a/app/s3.py +++ b/app/s3.py @@ -90,10 +90,6 @@ def upload_file(file: UploadFile, s3_key: str, content_type: str, metadata: dict file_content = file.file.read() file.file.seek(0) - extra_args = {"ContentType": content_type} - if metadata: - extra_args["Metadata"] = metadata - client.put_object( Bucket=bucket_name, Key=s3_key,