dont use _ in metadata
All checks were successful
Build and Publish / build-release (push) Successful in 49s
All checks were successful
Build and Publish / build-release (push) Successful in 49s
This commit is contained in:
@@ -3,7 +3,6 @@ from fastapi import APIRouter, HTTPException, UploadFile, File, Request
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from app import s3, pdf, utils
|
from app import s3, pdf, utils
|
||||||
from app.config import settings
|
|
||||||
from app.enums import DocumentType
|
from app.enums import DocumentType
|
||||||
from app.models import DocumentMetadata, UploadResponse, DownloadUrlResponse, FieldsResponse
|
from app.models import DocumentMetadata, UploadResponse, DownloadUrlResponse, FieldsResponse
|
||||||
from app.logger import get_logger
|
from app.logger import get_logger
|
||||||
@@ -44,13 +43,13 @@ async def upload_document(
|
|||||||
sanitized_filename = utils.sanitize_filename(file.filename)
|
sanitized_filename = utils.sanitize_filename(file.filename)
|
||||||
s3_key = utils.document_s3_key(org_id, document_id, sanitized_filename)
|
s3_key = utils.document_s3_key(org_id, document_id, sanitized_filename)
|
||||||
|
|
||||||
# Prepare metadata
|
# Prepare metadata
|
||||||
metadata_dict = {
|
metadata_dict = {
|
||||||
"org_id": org_id,
|
"org-id": org_id,
|
||||||
"document_type": document_type.value,
|
"document-type": document_type.value,
|
||||||
"filename": sanitized_filename,
|
"filename": sanitized_filename,
|
||||||
"file_size": str(file_size),
|
"file-size": str(file_size),
|
||||||
"created_at": datetime.utcnow().isoformat()
|
"created-at": datetime.utcnow().isoformat()
|
||||||
}
|
}
|
||||||
|
|
||||||
# Upload to S3
|
# Upload to S3
|
||||||
@@ -66,15 +65,15 @@ async def upload_document(
|
|||||||
|
|
||||||
# Create metadata response
|
# Create metadata response
|
||||||
metadata = DocumentMetadata(
|
metadata = DocumentMetadata(
|
||||||
document_id=document_id,
|
document_id=document_id,
|
||||||
org_id=org_id,
|
org_id=org_id,
|
||||||
document_type=document_type,
|
document_type=document_type,
|
||||||
filename=sanitized_filename,
|
filename=sanitized_filename,
|
||||||
content_type=detected_content_type,
|
content_type=detected_content_type,
|
||||||
file_size=file_size,
|
file_size=file_size,
|
||||||
s3_key=s3_key,
|
s3_key=s3_key,
|
||||||
created_at=datetime.utcnow(),
|
created_at=datetime.utcnow(),
|
||||||
updated_at=datetime.utcnow()
|
updated_at=datetime.utcnow()
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info(f"Upload completed - document_id: {document_id}")
|
logger.info(f"Upload completed - document_id: {document_id}")
|
||||||
@@ -118,17 +117,17 @@ async def rewrite_document(
|
|||||||
|
|
||||||
# Verify org_id matches
|
# Verify org_id matches
|
||||||
existing_metadata = s3.get_file_metadata(s3_key)
|
existing_metadata = s3.get_file_metadata(s3_key)
|
||||||
if existing_metadata.get("org_id") != org_id:
|
if existing_metadata.get("org-id") != org_id:
|
||||||
logger.error(f"Organization mismatch for document: {document_id}")
|
logger.error(f"Organization mismatch for document: {document_id}")
|
||||||
raise HTTPException(status_code=403, detail="Organization mismatch")
|
raise HTTPException(status_code=403, detail="Organization mismatch")
|
||||||
|
|
||||||
# Prepare metadata
|
# Prepare metadata
|
||||||
metadata_dict = {
|
metadata_dict = {
|
||||||
"org_id": org_id,
|
"org-id": org_id,
|
||||||
"document_type": document_type.value,
|
"document-type": document_type.value,
|
||||||
"filename": sanitized_filename,
|
"filename": sanitized_filename,
|
||||||
"file_size": str(file_size),
|
"file-size": str(file_size),
|
||||||
"updated_at": datetime.utcnow().isoformat()
|
"updated-at": datetime.utcnow().isoformat()
|
||||||
}
|
}
|
||||||
|
|
||||||
# Upload to S3 (overwrites existing)
|
# Upload to S3 (overwrites existing)
|
||||||
@@ -142,16 +141,16 @@ async def rewrite_document(
|
|||||||
# Generate download URL
|
# Generate download URL
|
||||||
download_url = s3.presigned_download_url(s3_key)
|
download_url = s3.presigned_download_url(s3_key)
|
||||||
|
|
||||||
# Create metadata response
|
# Create metadata response
|
||||||
metadata = DocumentMetadata(
|
metadata = DocumentMetadata(
|
||||||
document_id=document_id,
|
document_id=document_id,
|
||||||
org_id=org_id,
|
org_id=org_id,
|
||||||
document_type=document_type,
|
document_type=document_type,
|
||||||
filename=sanitized_filename,
|
filename=sanitized_filename,
|
||||||
content_type=detected_content_type,
|
content_type=detected_content_type,
|
||||||
file_size=file_size,
|
file_size=file_size,
|
||||||
s3_key=s3_key,
|
s3_key=s3_key,
|
||||||
created_at=datetime.fromisoformat(existing_metadata.get("created_at", datetime.utcnow().isoformat())),
|
created_at=datetime.fromisoformat(existing_metadata.get("created-at", datetime.utcnow().isoformat())),
|
||||||
updated_at=datetime.utcnow()
|
updated_at=datetime.utcnow()
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -170,7 +169,7 @@ async def get_document(request: Request, document_id: str):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
response = client.list_objects_v2(
|
response = client.list_objects_v2(
|
||||||
Bucket=settings.s3_bucket,
|
Bucket=s3.get_bucket_name(),
|
||||||
Prefix=prefix,
|
Prefix=prefix,
|
||||||
MaxKeys=1
|
MaxKeys=1
|
||||||
)
|
)
|
||||||
@@ -188,13 +187,13 @@ async def get_document(request: Request, document_id: str):
|
|||||||
s3_metadata = s3.get_file_metadata(s3_key)
|
s3_metadata = s3.get_file_metadata(s3_key)
|
||||||
|
|
||||||
# Verify org_id matches
|
# Verify org_id matches
|
||||||
if s3_metadata.get("org_id") != org_id:
|
if s3_metadata.get("org-id") != org_id:
|
||||||
logger.error(f"Organization mismatch for document: {document_id}")
|
logger.error(f"Organization mismatch for document: {document_id}")
|
||||||
raise HTTPException(status_code=403, detail="Organization mismatch")
|
raise HTTPException(status_code=403, detail="Organization mismatch")
|
||||||
|
|
||||||
# Get object info
|
# Get object info
|
||||||
try:
|
try:
|
||||||
object_info = client.head_object(Bucket=settings.s3_bucket, Key=s3_key)
|
object_info = client.head_object(Bucket=s3.get_bucket_name(), Key=s3_key)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to get object info: {e}")
|
logger.error(f"Failed to get object info: {e}")
|
||||||
raise HTTPException(status_code=500, detail="Failed to retrieve document")
|
raise HTTPException(status_code=500, detail="Failed to retrieve document")
|
||||||
@@ -202,14 +201,14 @@ async def get_document(request: Request, document_id: str):
|
|||||||
# Create metadata response
|
# Create metadata response
|
||||||
metadata = DocumentMetadata(
|
metadata = DocumentMetadata(
|
||||||
document_id=document_id,
|
document_id=document_id,
|
||||||
org_id=s3_metadata.get("org_id"),
|
org_id=s3_metadata.get("org-id"),
|
||||||
document_type=DocumentType(s3_metadata.get("document_type")),
|
document_type=DocumentType(s3_metadata.get("document-type")),
|
||||||
filename=s3_metadata.get("filename"),
|
filename=s3_metadata.get("filename"),
|
||||||
content_type=object_info.get("ContentType"),
|
content_type=object_info.get("ContentType"),
|
||||||
file_size=int(s3_metadata.get("file_size", object_info.get("ContentLength", 0))),
|
file_size=int(s3_metadata.get("file-size", object_info.get("ContentLength", 0))),
|
||||||
s3_key=s3_key,
|
s3_key=s3_key,
|
||||||
created_at=datetime.fromisoformat(s3_metadata.get("created_at", datetime.utcnow().isoformat())),
|
created_at=datetime.fromisoformat(s3_metadata.get("created-at", datetime.utcnow().isoformat())),
|
||||||
updated_at=datetime.fromisoformat(s3_metadata.get("updated_at", datetime.utcnow().isoformat()))
|
updated_at=datetime.fromisoformat(s3_metadata.get("updated-at", datetime.utcnow().isoformat()))
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info(f"Get document completed - document_id: {document_id}")
|
logger.info(f"Get document completed - document_id: {document_id}")
|
||||||
@@ -227,7 +226,7 @@ async def get_download_url(request: Request, document_id: str, expires_in: int =
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
response = client.list_objects_v2(
|
response = client.list_objects_v2(
|
||||||
Bucket=settings.s3_bucket,
|
Bucket=s3.get_bucket_name(),
|
||||||
Prefix=prefix,
|
Prefix=prefix,
|
||||||
MaxKeys=1
|
MaxKeys=1
|
||||||
)
|
)
|
||||||
@@ -243,7 +242,7 @@ async def get_download_url(request: Request, document_id: str, expires_in: int =
|
|||||||
|
|
||||||
# Verify org_id matches
|
# Verify org_id matches
|
||||||
s3_metadata = s3.get_file_metadata(s3_key)
|
s3_metadata = s3.get_file_metadata(s3_key)
|
||||||
if s3_metadata.get("org_id") != org_id:
|
if s3_metadata.get("org-id") != org_id:
|
||||||
logger.error(f"Organization mismatch for document: {document_id}")
|
logger.error(f"Organization mismatch for document: {document_id}")
|
||||||
raise HTTPException(status_code=403, detail="Organization mismatch")
|
raise HTTPException(status_code=403, detail="Organization mismatch")
|
||||||
|
|
||||||
@@ -265,7 +264,7 @@ async def get_document_fields(request: Request, document_id: str):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
response = client.list_objects_v2(
|
response = client.list_objects_v2(
|
||||||
Bucket=settings.s3_bucket,
|
Bucket=s3.get_bucket_name(),
|
||||||
Prefix=prefix,
|
Prefix=prefix,
|
||||||
MaxKeys=1
|
MaxKeys=1
|
||||||
)
|
)
|
||||||
@@ -283,12 +282,12 @@ async def get_document_fields(request: Request, document_id: str):
|
|||||||
s3_metadata = s3.get_file_metadata(s3_key)
|
s3_metadata = s3.get_file_metadata(s3_key)
|
||||||
|
|
||||||
# Verify org_id matches
|
# Verify org_id matches
|
||||||
if s3_metadata.get("org_id") != org_id:
|
if s3_metadata.get("org-id") != org_id:
|
||||||
logger.error(f"Organization mismatch for document: {document_id}")
|
logger.error(f"Organization mismatch for document: {document_id}")
|
||||||
raise HTTPException(status_code=403, detail="Organization mismatch")
|
raise HTTPException(status_code=403, detail="Organization mismatch")
|
||||||
|
|
||||||
# Check if PDF
|
# Check if PDF
|
||||||
document_type = s3_metadata.get("document_type")
|
document_type = s3_metadata.get("document-type")
|
||||||
if document_type != DocumentType.PDF.value:
|
if document_type != DocumentType.PDF.value:
|
||||||
logger.error(f"Document is not PDF: {document_type}")
|
logger.error(f"Document is not PDF: {document_type}")
|
||||||
raise HTTPException(status_code=400, detail="Field discovery only supported for PDF documents")
|
raise HTTPException(status_code=400, detail="Field discovery only supported for PDF documents")
|
||||||
@@ -323,7 +322,7 @@ async def delete_document(request: Request, document_id: str):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
response = client.list_objects_v2(
|
response = client.list_objects_v2(
|
||||||
Bucket=settings.s3_bucket,
|
Bucket=s3.get_bucket_name(),
|
||||||
Prefix=prefix,
|
Prefix=prefix,
|
||||||
MaxKeys=1
|
MaxKeys=1
|
||||||
)
|
)
|
||||||
@@ -339,7 +338,7 @@ async def delete_document(request: Request, document_id: str):
|
|||||||
|
|
||||||
# Verify org_id matches
|
# Verify org_id matches
|
||||||
s3_metadata = s3.get_file_metadata(s3_key)
|
s3_metadata = s3.get_file_metadata(s3_key)
|
||||||
if s3_metadata.get("org_id") != org_id:
|
if s3_metadata.get("org-id") != org_id:
|
||||||
logger.error(f"Organization mismatch for document: {document_id}")
|
logger.error(f"Organization mismatch for document: {document_id}")
|
||||||
raise HTTPException(status_code=403, detail="Organization mismatch")
|
raise HTTPException(status_code=403, detail="Organization mismatch")
|
||||||
|
|
||||||
@@ -348,7 +347,7 @@ async def delete_document(request: Request, document_id: str):
|
|||||||
s3.delete_file(s3_key)
|
s3.delete_file(s3_key)
|
||||||
logger.info(f"Document deleted - document_id: {document_id}")
|
logger.info(f"Document deleted - document_id: {document_id}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to delete document: {e}")
|
logger.error(f"Failed to delete file: {e}")
|
||||||
raise HTTPException(status_code=500, detail=f"Failed to delete document: {e}")
|
raise HTTPException(status_code=500, detail=f"Failed to delete file: {e}")
|
||||||
|
|
||||||
return {"message": "Document deleted successfully"}
|
return {"message": "Document deleted successfully"}
|
||||||
|
|||||||
@@ -90,10 +90,6 @@ def upload_file(file: UploadFile, s3_key: str, content_type: str, metadata: dict
|
|||||||
file_content = file.file.read()
|
file_content = file.file.read()
|
||||||
file.file.seek(0)
|
file.file.seek(0)
|
||||||
|
|
||||||
extra_args = {"ContentType": content_type}
|
|
||||||
if metadata:
|
|
||||||
extra_args["Metadata"] = metadata
|
|
||||||
|
|
||||||
client.put_object(
|
client.put_object(
|
||||||
Bucket=bucket_name,
|
Bucket=bucket_name,
|
||||||
Key=s3_key,
|
Key=s3_key,
|
||||||
|
|||||||
Reference in New Issue
Block a user