dont use _ in metadata
All checks were successful
Build and Publish / build-release (push) Successful in 49s
All checks were successful
Build and Publish / build-release (push) Successful in 49s
This commit is contained in:
@@ -3,7 +3,6 @@ from fastapi import APIRouter, HTTPException, UploadFile, File, Request
|
||||
from datetime import datetime
|
||||
|
||||
from app import s3, pdf, utils
|
||||
from app.config import settings
|
||||
from app.enums import DocumentType
|
||||
from app.models import DocumentMetadata, UploadResponse, DownloadUrlResponse, FieldsResponse
|
||||
from app.logger import get_logger
|
||||
@@ -46,11 +45,11 @@ async def upload_document(
|
||||
|
||||
# Prepare metadata
|
||||
metadata_dict = {
|
||||
"org_id": org_id,
|
||||
"document_type": document_type.value,
|
||||
"org-id": org_id,
|
||||
"document-type": document_type.value,
|
||||
"filename": sanitized_filename,
|
||||
"file_size": str(file_size),
|
||||
"created_at": datetime.utcnow().isoformat()
|
||||
"file-size": str(file_size),
|
||||
"created-at": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
# Upload to S3
|
||||
@@ -118,17 +117,17 @@ async def rewrite_document(
|
||||
|
||||
# Verify org_id matches
|
||||
existing_metadata = s3.get_file_metadata(s3_key)
|
||||
if existing_metadata.get("org_id") != org_id:
|
||||
if existing_metadata.get("org-id") != org_id:
|
||||
logger.error(f"Organization mismatch for document: {document_id}")
|
||||
raise HTTPException(status_code=403, detail="Organization mismatch")
|
||||
|
||||
# Prepare metadata
|
||||
metadata_dict = {
|
||||
"org_id": org_id,
|
||||
"document_type": document_type.value,
|
||||
"org-id": org_id,
|
||||
"document-type": document_type.value,
|
||||
"filename": sanitized_filename,
|
||||
"file_size": str(file_size),
|
||||
"updated_at": datetime.utcnow().isoformat()
|
||||
"file-size": str(file_size),
|
||||
"updated-at": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
# Upload to S3 (overwrites existing)
|
||||
@@ -151,7 +150,7 @@ async def rewrite_document(
|
||||
content_type=detected_content_type,
|
||||
file_size=file_size,
|
||||
s3_key=s3_key,
|
||||
created_at=datetime.fromisoformat(existing_metadata.get("created_at", datetime.utcnow().isoformat())),
|
||||
created_at=datetime.fromisoformat(existing_metadata.get("created-at", datetime.utcnow().isoformat())),
|
||||
updated_at=datetime.utcnow()
|
||||
)
|
||||
|
||||
@@ -170,7 +169,7 @@ async def get_document(request: Request, document_id: str):
|
||||
|
||||
try:
|
||||
response = client.list_objects_v2(
|
||||
Bucket=settings.s3_bucket,
|
||||
Bucket=s3.get_bucket_name(),
|
||||
Prefix=prefix,
|
||||
MaxKeys=1
|
||||
)
|
||||
@@ -188,13 +187,13 @@ async def get_document(request: Request, document_id: str):
|
||||
s3_metadata = s3.get_file_metadata(s3_key)
|
||||
|
||||
# Verify org_id matches
|
||||
if s3_metadata.get("org_id") != org_id:
|
||||
if s3_metadata.get("org-id") != org_id:
|
||||
logger.error(f"Organization mismatch for document: {document_id}")
|
||||
raise HTTPException(status_code=403, detail="Organization mismatch")
|
||||
|
||||
# Get object info
|
||||
try:
|
||||
object_info = client.head_object(Bucket=settings.s3_bucket, Key=s3_key)
|
||||
object_info = client.head_object(Bucket=s3.get_bucket_name(), Key=s3_key)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get object info: {e}")
|
||||
raise HTTPException(status_code=500, detail="Failed to retrieve document")
|
||||
@@ -202,14 +201,14 @@ async def get_document(request: Request, document_id: str):
|
||||
# Create metadata response
|
||||
metadata = DocumentMetadata(
|
||||
document_id=document_id,
|
||||
org_id=s3_metadata.get("org_id"),
|
||||
document_type=DocumentType(s3_metadata.get("document_type")),
|
||||
org_id=s3_metadata.get("org-id"),
|
||||
document_type=DocumentType(s3_metadata.get("document-type")),
|
||||
filename=s3_metadata.get("filename"),
|
||||
content_type=object_info.get("ContentType"),
|
||||
file_size=int(s3_metadata.get("file_size", object_info.get("ContentLength", 0))),
|
||||
file_size=int(s3_metadata.get("file-size", object_info.get("ContentLength", 0))),
|
||||
s3_key=s3_key,
|
||||
created_at=datetime.fromisoformat(s3_metadata.get("created_at", datetime.utcnow().isoformat())),
|
||||
updated_at=datetime.fromisoformat(s3_metadata.get("updated_at", datetime.utcnow().isoformat()))
|
||||
created_at=datetime.fromisoformat(s3_metadata.get("created-at", datetime.utcnow().isoformat())),
|
||||
updated_at=datetime.fromisoformat(s3_metadata.get("updated-at", datetime.utcnow().isoformat()))
|
||||
)
|
||||
|
||||
logger.info(f"Get document completed - document_id: {document_id}")
|
||||
@@ -227,7 +226,7 @@ async def get_download_url(request: Request, document_id: str, expires_in: int =
|
||||
|
||||
try:
|
||||
response = client.list_objects_v2(
|
||||
Bucket=settings.s3_bucket,
|
||||
Bucket=s3.get_bucket_name(),
|
||||
Prefix=prefix,
|
||||
MaxKeys=1
|
||||
)
|
||||
@@ -243,7 +242,7 @@ async def get_download_url(request: Request, document_id: str, expires_in: int =
|
||||
|
||||
# Verify org_id matches
|
||||
s3_metadata = s3.get_file_metadata(s3_key)
|
||||
if s3_metadata.get("org_id") != org_id:
|
||||
if s3_metadata.get("org-id") != org_id:
|
||||
logger.error(f"Organization mismatch for document: {document_id}")
|
||||
raise HTTPException(status_code=403, detail="Organization mismatch")
|
||||
|
||||
@@ -265,7 +264,7 @@ async def get_document_fields(request: Request, document_id: str):
|
||||
|
||||
try:
|
||||
response = client.list_objects_v2(
|
||||
Bucket=settings.s3_bucket,
|
||||
Bucket=s3.get_bucket_name(),
|
||||
Prefix=prefix,
|
||||
MaxKeys=1
|
||||
)
|
||||
@@ -283,12 +282,12 @@ async def get_document_fields(request: Request, document_id: str):
|
||||
s3_metadata = s3.get_file_metadata(s3_key)
|
||||
|
||||
# Verify org_id matches
|
||||
if s3_metadata.get("org_id") != org_id:
|
||||
if s3_metadata.get("org-id") != org_id:
|
||||
logger.error(f"Organization mismatch for document: {document_id}")
|
||||
raise HTTPException(status_code=403, detail="Organization mismatch")
|
||||
|
||||
# Check if PDF
|
||||
document_type = s3_metadata.get("document_type")
|
||||
document_type = s3_metadata.get("document-type")
|
||||
if document_type != DocumentType.PDF.value:
|
||||
logger.error(f"Document is not PDF: {document_type}")
|
||||
raise HTTPException(status_code=400, detail="Field discovery only supported for PDF documents")
|
||||
@@ -323,7 +322,7 @@ async def delete_document(request: Request, document_id: str):
|
||||
|
||||
try:
|
||||
response = client.list_objects_v2(
|
||||
Bucket=settings.s3_bucket,
|
||||
Bucket=s3.get_bucket_name(),
|
||||
Prefix=prefix,
|
||||
MaxKeys=1
|
||||
)
|
||||
@@ -339,7 +338,7 @@ async def delete_document(request: Request, document_id: str):
|
||||
|
||||
# Verify org_id matches
|
||||
s3_metadata = s3.get_file_metadata(s3_key)
|
||||
if s3_metadata.get("org_id") != org_id:
|
||||
if s3_metadata.get("org-id") != org_id:
|
||||
logger.error(f"Organization mismatch for document: {document_id}")
|
||||
raise HTTPException(status_code=403, detail="Organization mismatch")
|
||||
|
||||
@@ -348,7 +347,7 @@ async def delete_document(request: Request, document_id: str):
|
||||
s3.delete_file(s3_key)
|
||||
logger.info(f"Document deleted - document_id: {document_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete document: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to delete document: {e}")
|
||||
logger.error(f"Failed to delete file: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to delete file: {e}")
|
||||
|
||||
return {"message": "Document deleted successfully"}
|
||||
|
||||
@@ -90,10 +90,6 @@ def upload_file(file: UploadFile, s3_key: str, content_type: str, metadata: dict
|
||||
file_content = file.file.read()
|
||||
file.file.seek(0)
|
||||
|
||||
extra_args = {"ContentType": content_type}
|
||||
if metadata:
|
||||
extra_args["Metadata"] = metadata
|
||||
|
||||
client.put_object(
|
||||
Bucket=bucket_name,
|
||||
Key=s3_key,
|
||||
|
||||
Reference in New Issue
Block a user