dont use _ in metadata
All checks were successful
Build and Publish / build-release (push) Successful in 49s

This commit is contained in:
2026-04-24 15:16:36 -05:00
parent c952063d7b
commit 1550fc7599
2 changed files with 48 additions and 53 deletions

View File

@@ -3,7 +3,6 @@ from fastapi import APIRouter, HTTPException, UploadFile, File, Request
from datetime import datetime
from app import s3, pdf, utils
from app.config import settings
from app.enums import DocumentType
from app.models import DocumentMetadata, UploadResponse, DownloadUrlResponse, FieldsResponse
from app.logger import get_logger
@@ -44,13 +43,13 @@ async def upload_document(
sanitized_filename = utils.sanitize_filename(file.filename)
s3_key = utils.document_s3_key(org_id, document_id, sanitized_filename)
# Prepare metadata
# Prepare metadata
metadata_dict = {
"org_id": org_id,
"document_type": document_type.value,
"filename": sanitized_filename,
"file_size": str(file_size),
"created_at": datetime.utcnow().isoformat()
"org-id": org_id,
"document-type": document_type.value,
"filename": sanitized_filename,
"file-size": str(file_size),
"created-at": datetime.utcnow().isoformat()
}
# Upload to S3
@@ -66,15 +65,15 @@ async def upload_document(
# Create metadata response
metadata = DocumentMetadata(
document_id=document_id,
org_id=org_id,
document_type=document_type,
filename=sanitized_filename,
content_type=detected_content_type,
file_size=file_size,
s3_key=s3_key,
created_at=datetime.utcnow(),
updated_at=datetime.utcnow()
document_id=document_id,
org_id=org_id,
document_type=document_type,
filename=sanitized_filename,
content_type=detected_content_type,
file_size=file_size,
s3_key=s3_key,
created_at=datetime.utcnow(),
updated_at=datetime.utcnow()
)
logger.info(f"Upload completed - document_id: {document_id}")
@@ -118,17 +117,17 @@ async def rewrite_document(
# Verify org_id matches
existing_metadata = s3.get_file_metadata(s3_key)
if existing_metadata.get("org_id") != org_id:
if existing_metadata.get("org-id") != org_id:
logger.error(f"Organization mismatch for document: {document_id}")
raise HTTPException(status_code=403, detail="Organization mismatch")
# Prepare metadata
# Prepare metadata
metadata_dict = {
"org_id": org_id,
"document_type": document_type.value,
"filename": sanitized_filename,
"file_size": str(file_size),
"updated_at": datetime.utcnow().isoformat()
"org-id": org_id,
"document-type": document_type.value,
"filename": sanitized_filename,
"file-size": str(file_size),
"updated-at": datetime.utcnow().isoformat()
}
# Upload to S3 (overwrites existing)
@@ -142,16 +141,16 @@ async def rewrite_document(
# Generate download URL
download_url = s3.presigned_download_url(s3_key)
# Create metadata response
# Create metadata response
metadata = DocumentMetadata(
document_id=document_id,
org_id=org_id,
document_type=document_type,
filename=sanitized_filename,
content_type=detected_content_type,
file_size=file_size,
s3_key=s3_key,
created_at=datetime.fromisoformat(existing_metadata.get("created_at", datetime.utcnow().isoformat())),
document_id=document_id,
org_id=org_id,
document_type=document_type,
filename=sanitized_filename,
content_type=detected_content_type,
file_size=file_size,
s3_key=s3_key,
created_at=datetime.fromisoformat(existing_metadata.get("created-at", datetime.utcnow().isoformat())),
updated_at=datetime.utcnow()
)
@@ -170,7 +169,7 @@ async def get_document(request: Request, document_id: str):
try:
response = client.list_objects_v2(
Bucket=settings.s3_bucket,
Bucket=s3.get_bucket_name(),
Prefix=prefix,
MaxKeys=1
)
@@ -188,13 +187,13 @@ async def get_document(request: Request, document_id: str):
s3_metadata = s3.get_file_metadata(s3_key)
# Verify org_id matches
if s3_metadata.get("org_id") != org_id:
if s3_metadata.get("org-id") != org_id:
logger.error(f"Organization mismatch for document: {document_id}")
raise HTTPException(status_code=403, detail="Organization mismatch")
# Get object info
try:
object_info = client.head_object(Bucket=settings.s3_bucket, Key=s3_key)
object_info = client.head_object(Bucket=s3.get_bucket_name(), Key=s3_key)
except Exception as e:
logger.error(f"Failed to get object info: {e}")
raise HTTPException(status_code=500, detail="Failed to retrieve document")
@@ -202,14 +201,14 @@ async def get_document(request: Request, document_id: str):
# Create metadata response
metadata = DocumentMetadata(
document_id=document_id,
org_id=s3_metadata.get("org_id"),
document_type=DocumentType(s3_metadata.get("document_type")),
org_id=s3_metadata.get("org-id"),
document_type=DocumentType(s3_metadata.get("document-type")),
filename=s3_metadata.get("filename"),
content_type=object_info.get("ContentType"),
file_size=int(s3_metadata.get("file_size", object_info.get("ContentLength", 0))),
file_size=int(s3_metadata.get("file-size", object_info.get("ContentLength", 0))),
s3_key=s3_key,
created_at=datetime.fromisoformat(s3_metadata.get("created_at", datetime.utcnow().isoformat())),
updated_at=datetime.fromisoformat(s3_metadata.get("updated_at", datetime.utcnow().isoformat()))
created_at=datetime.fromisoformat(s3_metadata.get("created-at", datetime.utcnow().isoformat())),
updated_at=datetime.fromisoformat(s3_metadata.get("updated-at", datetime.utcnow().isoformat()))
)
logger.info(f"Get document completed - document_id: {document_id}")
@@ -227,7 +226,7 @@ async def get_download_url(request: Request, document_id: str, expires_in: int =
try:
response = client.list_objects_v2(
Bucket=settings.s3_bucket,
Bucket=s3.get_bucket_name(),
Prefix=prefix,
MaxKeys=1
)
@@ -243,7 +242,7 @@ async def get_download_url(request: Request, document_id: str, expires_in: int =
# Verify org_id matches
s3_metadata = s3.get_file_metadata(s3_key)
if s3_metadata.get("org_id") != org_id:
if s3_metadata.get("org-id") != org_id:
logger.error(f"Organization mismatch for document: {document_id}")
raise HTTPException(status_code=403, detail="Organization mismatch")
@@ -265,7 +264,7 @@ async def get_document_fields(request: Request, document_id: str):
try:
response = client.list_objects_v2(
Bucket=settings.s3_bucket,
Bucket=s3.get_bucket_name(),
Prefix=prefix,
MaxKeys=1
)
@@ -283,12 +282,12 @@ async def get_document_fields(request: Request, document_id: str):
s3_metadata = s3.get_file_metadata(s3_key)
# Verify org_id matches
if s3_metadata.get("org_id") != org_id:
if s3_metadata.get("org-id") != org_id:
logger.error(f"Organization mismatch for document: {document_id}")
raise HTTPException(status_code=403, detail="Organization mismatch")
# Check if PDF
document_type = s3_metadata.get("document_type")
document_type = s3_metadata.get("document-type")
if document_type != DocumentType.PDF.value:
logger.error(f"Document is not PDF: {document_type}")
raise HTTPException(status_code=400, detail="Field discovery only supported for PDF documents")
@@ -323,7 +322,7 @@ async def delete_document(request: Request, document_id: str):
try:
response = client.list_objects_v2(
Bucket=settings.s3_bucket,
Bucket=s3.get_bucket_name(),
Prefix=prefix,
MaxKeys=1
)
@@ -339,7 +338,7 @@ async def delete_document(request: Request, document_id: str):
# Verify org_id matches
s3_metadata = s3.get_file_metadata(s3_key)
if s3_metadata.get("org_id") != org_id:
if s3_metadata.get("org-id") != org_id:
logger.error(f"Organization mismatch for document: {document_id}")
raise HTTPException(status_code=403, detail="Organization mismatch")
@@ -348,7 +347,7 @@ async def delete_document(request: Request, document_id: str):
s3.delete_file(s3_key)
logger.info(f"Document deleted - document_id: {document_id}")
except Exception as e:
logger.error(f"Failed to delete document: {e}")
raise HTTPException(status_code=500, detail=f"Failed to delete document: {e}")
logger.error(f"Failed to delete file: {e}")
raise HTTPException(status_code=500, detail=f"Failed to delete file: {e}")
return {"message": "Document deleted successfully"}