Compare commits

..

15 Commits

Author SHA1 Message Date
9a58242a91 prefix route
All checks were successful
Build and Publish / build-release (push) Successful in 57s
2026-04-29 13:09:32 -05:00
1550fc7599 dont use _ in metadata
All checks were successful
Build and Publish / build-release (push) Successful in 49s
2026-04-24 15:16:36 -05:00
c952063d7b url encode filename
All checks were successful
Build and Publish / build-release (push) Successful in 48s
2026-04-24 14:52:06 -05:00
2018f5a5f6 add bucket info in values
All checks were successful
Build and Publish / build-release (push) Successful in 1m8s
2026-04-24 14:38:05 -05:00
c77845852e use credentialssecretname
All checks were successful
Build and Publish / build-release (push) Successful in 46s
2026-04-24 14:16:53 -05:00
64a6bbed68 fix suffix
All checks were successful
Build and Publish / build-release (push) Successful in 45s
2026-04-24 14:12:59 -05:00
036d2553be fix names
Some checks failed
Build and Publish / build-release (push) Has been cancelled
2026-04-24 14:12:42 -05:00
70691ad392 add access to bucket
All checks were successful
Build and Publish / build-release (push) Successful in 48s
2026-04-24 14:10:19 -05:00
ff3d93e933 add bucket in CRD
All checks were successful
Build and Publish / build-release (push) Successful in 44s
2026-04-24 14:05:28 -05:00
7e91a385f4 fix indent
All checks were successful
Build and Publish / build-release (push) Successful in 47s
2026-04-24 12:21:53 -05:00
ee4b1faea4 use sanitzed name
All checks were successful
Build and Publish / build-release (push) Successful in 45s
2026-04-24 12:18:06 -05:00
5ee4e847d0 prefix urls with api
All checks were successful
Build and Publish / build-release (push) Successful in 44s
2026-04-24 11:31:30 -05:00
49f2d1a97d fix openapi url
All checks were successful
Build and Publish / build-release (push) Successful in 59s
2026-04-24 11:25:09 -05:00
3bcc38b4ae fix access_key_id name
All checks were successful
Build and Publish / build-release (push) Successful in 43s
2026-04-23 17:02:36 -05:00
7d0561f2db remove resources
All checks were successful
Build and Publish / build-release (push) Successful in 45s
2026-04-23 16:56:45 -05:00
12 changed files with 190 additions and 140 deletions

View File

@@ -16,7 +16,7 @@ Generic document management service with S3 storage and PDF field discovery.
### Upload Document ### Upload Document
``` ```
POST /api/documents/upload POST /api/v1/documents/upload
Content-Type: multipart/form-data Content-Type: multipart/form-data
Authorization: Bearer <token> Authorization: Bearer <token>
@@ -34,7 +34,7 @@ Response:
### Rewrite Document ### Rewrite Document
``` ```
PUT /api/documents/{document_id} PUT /api/v1/documents/{document_id}
Content-Type: multipart/form-data Content-Type: multipart/form-data
Authorization: Bearer <token> Authorization: Bearer <token>
@@ -52,7 +52,7 @@ Response:
### Get Document Metadata ### Get Document Metadata
``` ```
GET /api/documents/{document_id} GET /api/v1/documents/{document_id}
Authorization: Bearer <token> Authorization: Bearer <token>
Response: Response:
@@ -72,7 +72,7 @@ Response:
### Get Download URL ### Get Download URL
``` ```
GET /api/documents/{document_id}/download-url?expires_in=3600 GET /api/v1/documents/{document_id}/download-url?expires_in=3600
Authorization: Bearer <token> Authorization: Bearer <token>
Response: Response:
@@ -85,7 +85,7 @@ Response:
### Get PDF Fields ### Get PDF Fields
``` ```
GET /api/documents/{document_id}/fields GET /api/v1/documents/{document_id}/fields
Authorization: Bearer <token> Authorization: Bearer <token>
Response: Response:
@@ -106,7 +106,7 @@ Response:
### Delete Document ### Delete Document
``` ```
DELETE /api/documents/{document_id} DELETE /api/v1/documents/{document_id}
Authorization: Bearer <token> Authorization: Bearer <token>
Response: Response:

View File

@@ -1,9 +1,12 @@
from pydantic_settings import BaseSettings from pydantic_settings import BaseSettings
class Settings(BaseSettings): class Settings(BaseSettings):
# S3 settings # COSI settings (preferred)
cosi_bucket_info_path: str = ""
# S3 settings (fallback)
s3_endpoint: str = "http://localhost:9000" s3_endpoint: str = "http://localhost:9000"
s3_access_key: str = "minioadmin" s3_access_key_id: str = "minioadmin"
s3_secret_key: str = "minioadmin" s3_secret_key: str = "minioadmin"
s3_bucket: str = "document-bucket" s3_bucket: str = "document-bucket"
s3_region: str = "us-east-1" s3_region: str = "us-east-1"

View File

@@ -2,7 +2,6 @@ from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from fastapi.openapi.utils import get_openapi from fastapi.openapi.utils import get_openapi
from app.routers import documents from app.routers import documents
from app.config import settings
from app.logger import setup_logging from app.logger import setup_logging
from app.middleware.auth import AuthMiddleware from app.middleware.auth import AuthMiddleware
@@ -13,9 +12,9 @@ app = FastAPI(
title="Document Service", title="Document Service",
version="1.0.0", version="1.0.0",
description="Generic document management service with S3 storage and PDF field discovery", description="Generic document management service with S3 storage and PDF field discovery",
openapi_url="/openapi3.json", openapi_url="/api/openapi",
docs_url="/docs", docs_url="/api/docs",
redoc_url="/redoc" redoc_url="/api/redoc"
) )
# Add auth middleware # Add auth middleware
@@ -23,7 +22,7 @@ app.add_middleware(AuthMiddleware)
app.add_middleware( app.add_middleware(
CORSMiddleware, CORSMiddleware,
allow_origins=["http://localhost:3000"], allow_origins=["*"],
allow_methods=["*"], allow_methods=["*"],
allow_headers=["*"] allow_headers=["*"]
) )

View File

@@ -1,6 +1,5 @@
from fastapi import Request from fastapi import Request
from starlette.middleware.base import BaseHTTPMiddleware from starlette.middleware.base import BaseHTTPMiddleware
from starlette.responses import JSONResponse
from app.logger import get_logger from app.logger import get_logger
logger = get_logger(__name__) logger = get_logger(__name__)

View File

@@ -1,6 +1,5 @@
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from datetime import datetime from datetime import datetime
from typing import Optional
from app.enums import DocumentType from app.enums import DocumentType
class DocumentMetadata(BaseModel): class DocumentMetadata(BaseModel):

View File

@@ -1,6 +1,4 @@
import os
from pypdf import PdfReader from pypdf import PdfReader
from typing import Any
def discover_fields(pdf_path: str) -> list[dict]: def discover_fields(pdf_path: str) -> list[dict]:
""" """

View File

@@ -1 +1,3 @@
from app.routers import documents from app.routers import documents as documents
__all__ = ["documents"]

View File

@@ -1,15 +1,13 @@
import os import os
from fastapi import APIRouter, HTTPException, UploadFile, File, Form, Request from fastapi import APIRouter, HTTPException, UploadFile, File, Request
from typing import Optional
from datetime import datetime from datetime import datetime
from app import s3, pdf, utils from app import s3, pdf, utils
from app.config import settings
from app.enums import DocumentType from app.enums import DocumentType
from app.models import DocumentMetadata, UploadResponse, DownloadUrlResponse, FieldsResponse from app.models import DocumentMetadata, UploadResponse, DownloadUrlResponse, FieldsResponse
from app.logger import get_logger from app.logger import get_logger
router = APIRouter(prefix="/api/documents", tags=["documents"]) router = APIRouter(prefix="/api/v1/documents", tags=["documents"])
logger = get_logger(__name__) logger = get_logger(__name__)
@router.post("/upload", response_model=UploadResponse) @router.post("/upload", response_model=UploadResponse)
@@ -47,11 +45,11 @@ async def upload_document(
# Prepare metadata # Prepare metadata
metadata_dict = { metadata_dict = {
"org_id": org_id, "org-id": org_id,
"document_type": document_type.value, "document-type": document_type.value,
"filename": file.filename, "filename": sanitized_filename,
"file_size": str(file_size), "file-size": str(file_size),
"created_at": datetime.utcnow().isoformat() "created-at": datetime.utcnow().isoformat()
} }
# Upload to S3 # Upload to S3
@@ -70,7 +68,7 @@ async def upload_document(
document_id=document_id, document_id=document_id,
org_id=org_id, org_id=org_id,
document_type=document_type, document_type=document_type,
filename=file.filename, filename=sanitized_filename,
content_type=detected_content_type, content_type=detected_content_type,
file_size=file_size, file_size=file_size,
s3_key=s3_key, s3_key=s3_key,
@@ -119,17 +117,17 @@ async def rewrite_document(
# Verify org_id matches # Verify org_id matches
existing_metadata = s3.get_file_metadata(s3_key) existing_metadata = s3.get_file_metadata(s3_key)
if existing_metadata.get("org_id") != org_id: if existing_metadata.get("org-id") != org_id:
logger.error(f"Organization mismatch for document: {document_id}") logger.error(f"Organization mismatch for document: {document_id}")
raise HTTPException(status_code=403, detail="Organization mismatch") raise HTTPException(status_code=403, detail="Organization mismatch")
# Prepare metadata # Prepare metadata
metadata_dict = { metadata_dict = {
"org_id": org_id, "org-id": org_id,
"document_type": document_type.value, "document-type": document_type.value,
"filename": file.filename, "filename": sanitized_filename,
"file_size": str(file_size), "file-size": str(file_size),
"updated_at": datetime.utcnow().isoformat() "updated-at": datetime.utcnow().isoformat()
} }
# Upload to S3 (overwrites existing) # Upload to S3 (overwrites existing)
@@ -148,12 +146,12 @@ async def rewrite_document(
document_id=document_id, document_id=document_id,
org_id=org_id, org_id=org_id,
document_type=document_type, document_type=document_type,
filename=file.filename, filename=sanitized_filename,
content_type=detected_content_type, content_type=detected_content_type,
file_size=file_size, file_size=file_size,
s3_key=s3_key, s3_key=s3_key,
created_at=datetime.fromisoformat(existing_metadata.get("created_at", datetime.utcnow().isoformat())), created_at=datetime.fromisoformat(existing_metadata.get("created-at", datetime.utcnow().isoformat())),
updated_at=datetime.utcnow() updated_at=datetime.utcnow()
) )
logger.info(f"Rewrite completed - document_id: {document_id}") logger.info(f"Rewrite completed - document_id: {document_id}")
@@ -171,7 +169,7 @@ async def get_document(request: Request, document_id: str):
try: try:
response = client.list_objects_v2( response = client.list_objects_v2(
Bucket=settings.s3_bucket, Bucket=s3.get_bucket_name(),
Prefix=prefix, Prefix=prefix,
MaxKeys=1 MaxKeys=1
) )
@@ -189,13 +187,13 @@ async def get_document(request: Request, document_id: str):
s3_metadata = s3.get_file_metadata(s3_key) s3_metadata = s3.get_file_metadata(s3_key)
# Verify org_id matches # Verify org_id matches
if s3_metadata.get("org_id") != org_id: if s3_metadata.get("org-id") != org_id:
logger.error(f"Organization mismatch for document: {document_id}") logger.error(f"Organization mismatch for document: {document_id}")
raise HTTPException(status_code=403, detail="Organization mismatch") raise HTTPException(status_code=403, detail="Organization mismatch")
# Get object info # Get object info
try: try:
object_info = client.head_object(Bucket=settings.s3_bucket, Key=s3_key) object_info = client.head_object(Bucket=s3.get_bucket_name(), Key=s3_key)
except Exception as e: except Exception as e:
logger.error(f"Failed to get object info: {e}") logger.error(f"Failed to get object info: {e}")
raise HTTPException(status_code=500, detail="Failed to retrieve document") raise HTTPException(status_code=500, detail="Failed to retrieve document")
@@ -203,14 +201,14 @@ async def get_document(request: Request, document_id: str):
# Create metadata response # Create metadata response
metadata = DocumentMetadata( metadata = DocumentMetadata(
document_id=document_id, document_id=document_id,
org_id=s3_metadata.get("org_id"), org_id=s3_metadata.get("org-id"),
document_type=DocumentType(s3_metadata.get("document_type")), document_type=DocumentType(s3_metadata.get("document-type")),
filename=s3_metadata.get("filename"), filename=s3_metadata.get("filename"),
content_type=object_info.get("ContentType"), content_type=object_info.get("ContentType"),
file_size=int(s3_metadata.get("file_size", object_info.get("ContentLength", 0))), file_size=int(s3_metadata.get("file-size", object_info.get("ContentLength", 0))),
s3_key=s3_key, s3_key=s3_key,
created_at=datetime.fromisoformat(s3_metadata.get("created_at", datetime.utcnow().isoformat())), created_at=datetime.fromisoformat(s3_metadata.get("created-at", datetime.utcnow().isoformat())),
updated_at=datetime.fromisoformat(s3_metadata.get("updated_at", datetime.utcnow().isoformat())) updated_at=datetime.fromisoformat(s3_metadata.get("updated-at", datetime.utcnow().isoformat()))
) )
logger.info(f"Get document completed - document_id: {document_id}") logger.info(f"Get document completed - document_id: {document_id}")
@@ -228,7 +226,7 @@ async def get_download_url(request: Request, document_id: str, expires_in: int =
try: try:
response = client.list_objects_v2( response = client.list_objects_v2(
Bucket=settings.s3_bucket, Bucket=s3.get_bucket_name(),
Prefix=prefix, Prefix=prefix,
MaxKeys=1 MaxKeys=1
) )
@@ -244,7 +242,7 @@ async def get_download_url(request: Request, document_id: str, expires_in: int =
# Verify org_id matches # Verify org_id matches
s3_metadata = s3.get_file_metadata(s3_key) s3_metadata = s3.get_file_metadata(s3_key)
if s3_metadata.get("org_id") != org_id: if s3_metadata.get("org-id") != org_id:
logger.error(f"Organization mismatch for document: {document_id}") logger.error(f"Organization mismatch for document: {document_id}")
raise HTTPException(status_code=403, detail="Organization mismatch") raise HTTPException(status_code=403, detail="Organization mismatch")
@@ -266,7 +264,7 @@ async def get_document_fields(request: Request, document_id: str):
try: try:
response = client.list_objects_v2( response = client.list_objects_v2(
Bucket=settings.s3_bucket, Bucket=s3.get_bucket_name(),
Prefix=prefix, Prefix=prefix,
MaxKeys=1 MaxKeys=1
) )
@@ -284,12 +282,12 @@ async def get_document_fields(request: Request, document_id: str):
s3_metadata = s3.get_file_metadata(s3_key) s3_metadata = s3.get_file_metadata(s3_key)
# Verify org_id matches # Verify org_id matches
if s3_metadata.get("org_id") != org_id: if s3_metadata.get("org-id") != org_id:
logger.error(f"Organization mismatch for document: {document_id}") logger.error(f"Organization mismatch for document: {document_id}")
raise HTTPException(status_code=403, detail="Organization mismatch") raise HTTPException(status_code=403, detail="Organization mismatch")
# Check if PDF # Check if PDF
document_type = s3_metadata.get("document_type") document_type = s3_metadata.get("document-type")
if document_type != DocumentType.PDF.value: if document_type != DocumentType.PDF.value:
logger.error(f"Document is not PDF: {document_type}") logger.error(f"Document is not PDF: {document_type}")
raise HTTPException(status_code=400, detail="Field discovery only supported for PDF documents") raise HTTPException(status_code=400, detail="Field discovery only supported for PDF documents")
@@ -324,7 +322,7 @@ async def delete_document(request: Request, document_id: str):
try: try:
response = client.list_objects_v2( response = client.list_objects_v2(
Bucket=settings.s3_bucket, Bucket=s3.get_bucket_name(),
Prefix=prefix, Prefix=prefix,
MaxKeys=1 MaxKeys=1
) )
@@ -340,7 +338,7 @@ async def delete_document(request: Request, document_id: str):
# Verify org_id matches # Verify org_id matches
s3_metadata = s3.get_file_metadata(s3_key) s3_metadata = s3.get_file_metadata(s3_key)
if s3_metadata.get("org_id") != org_id: if s3_metadata.get("org-id") != org_id:
logger.error(f"Organization mismatch for document: {document_id}") logger.error(f"Organization mismatch for document: {document_id}")
raise HTTPException(status_code=403, detail="Organization mismatch") raise HTTPException(status_code=403, detail="Organization mismatch")
@@ -349,7 +347,7 @@ async def delete_document(request: Request, document_id: str):
s3.delete_file(s3_key) s3.delete_file(s3_key)
logger.info(f"Document deleted - document_id: {document_id}") logger.info(f"Document deleted - document_id: {document_id}")
except Exception as e: except Exception as e:
logger.error(f"Failed to delete document: {e}") logger.error(f"Failed to delete file: {e}")
raise HTTPException(status_code=500, detail=f"Failed to delete document: {e}") raise HTTPException(status_code=500, detail=f"Failed to delete file: {e}")
return {"message": "Document deleted successfully"} return {"message": "Document deleted successfully"}

107
app/s3.py
View File

@@ -1,6 +1,7 @@
import boto3 import boto3
import tempfile import json
import os import os
import tempfile
from botocore.client import Config from botocore.client import Config
from fastapi import UploadFile from fastapi import UploadFile
from app.config import settings from app.config import settings
@@ -8,94 +9,136 @@ from app.logger import get_logger
logger = get_logger(__name__) logger = get_logger(__name__)
def read_bucket_info() -> dict:
if not settings.cosi_bucket_info_path:
raise ValueError("COSI_BUCKET_INFO_PATH not set")
with open(settings.cosi_bucket_info_path, "r") as f:
return json.load(f)
def get_cosi_s3_config() -> dict:
bucket_info = read_bucket_info()
s3_conf = bucket_info["spec"]["secretS3"]
return {
"endpoint": s3_conf["endpoint"],
"access_key": s3_conf["accessKeyID"],
"secret_key": s3_conf["accessSecretKey"],
"bucket": bucket_info["spec"]["bucketName"]
}
def get_client(): def get_client():
if settings.cosi_bucket_info_path:
cosi_config = get_cosi_s3_config()
return boto3.client(
"s3",
endpoint_url=cosi_config["endpoint"],
aws_access_key_id=cosi_config["access_key"],
aws_secret_access_key=cosi_config["secret_key"],
config=Config(signature_version="s3v4"),
region_name="us-east-1"
)
return boto3.client( return boto3.client(
"s3", "s3",
endpoint_url=settings.s3_endpoint, endpoint_url=settings.s3_endpoint,
aws_access_key_id=settings.s3_access_key, aws_access_key_id=settings.s3_access_key_id,
aws_secret_access_key=settings.s3_secret_key, aws_secret_access_key=settings.s3_secret_key,
config=Config(signature_version="s3v4"), config=Config(signature_version="s3v4"),
region_name=settings.s3_region region_name=settings.s3_region
) )
def ensure_bucket_exists() -> None:
"""Ensure the S3 bucket exists, create it if it doesn't exist.
Raises: def get_bucket_name() -> str:
Exception: If bucket creation fails (service will fail to start) if settings.cosi_bucket_info_path:
""" return get_cosi_s3_config()["bucket"]
return settings.s3_bucket
def ensure_bucket_exists() -> None:
bucket_name = get_bucket_name()
client = get_client() client = get_client()
try: try:
client.head_bucket(Bucket=settings.s3_bucket) client.head_bucket(Bucket=bucket_name)
logger.info(f"Bucket '{settings.s3_bucket}' already exists") logger.info(f"Bucket '{bucket_name}' already exists")
except client.exceptions.ClientError as e: except client.exceptions.ClientError as e:
error_code = e.response['Error']['Code'] error_code = e.response['Error']['Code']
if error_code == '404': if error_code == '404':
try: try:
client.create_bucket( client.create_bucket(
Bucket=settings.s3_bucket, Bucket=bucket_name,
CreateBucketConfiguration={ CreateBucketConfiguration={
'LocationConstraint': settings.s3_region 'LocationConstraint': 'us-east-1'
} }
) )
logger.info(f"Created bucket '{settings.s3_bucket}'") logger.info(f"Created bucket '{bucket_name}'")
except Exception as create_error: except Exception as create_error:
logger.error(f"Failed to create bucket '{settings.s3_bucket}': {create_error}") logger.error(f"Failed to create bucket '{bucket_name}': {create_error}")
raise raise
else: else:
logger.error(f"Error checking bucket: {e}") logger.error(f"Error checking bucket: {e}")
raise raise
def upload_file(file: UploadFile, s3_key: str, content_type: str, metadata: dict = None) -> str: def upload_file(file: UploadFile, s3_key: str, content_type: str, metadata: dict = None) -> str:
"""Upload file to S3 with metadata""" bucket_name = get_bucket_name()
client = get_client() client = get_client()
extra_args = {"ContentType": content_type} file.file.seek(0, os.SEEK_END)
if metadata: file_size = file.file.tell()
extra_args["Metadata"] = metadata file.file.seek(0)
file_content = file.file.read()
file.file.seek(0)
client.upload_fileobj( client.put_object(
file.file, Bucket=bucket_name,
settings.s3_bucket, Key=s3_key,
s3_key, Body=file_content,
ExtraArgs=extra_args ContentLength=file_size,
ContentType=content_type,
Metadata=metadata
) )
return s3_key return s3_key
def delete_file(s3_key: str) -> None: def delete_file(s3_key: str) -> None:
"""Delete file from S3""" bucket_name = get_bucket_name()
client = get_client() client = get_client()
client.delete_object(Bucket=settings.s3_bucket, Key=s3_key) client.delete_object(Bucket=bucket_name, Key=s3_key)
def file_exists(s3_key: str) -> bool: def file_exists(s3_key: str) -> bool:
"""Check if file exists in S3""" bucket_name = get_bucket_name()
client = get_client() client = get_client()
try: try:
client.head_object(Bucket=settings.s3_bucket, Key=s3_key) client.head_object(Bucket=bucket_name, Key=s3_key)
return True return True
except client.exceptions.ClientError: except client.exceptions.ClientError:
return False return False
def get_file_metadata(s3_key: str) -> dict: def get_file_metadata(s3_key: str) -> dict:
"""Get file metadata from S3""" bucket_name = get_bucket_name()
client = get_client() client = get_client()
response = client.head_object(Bucket=settings.s3_bucket, Key=s3_key) response = client.head_object(Bucket=bucket_name, Key=s3_key)
return response.get("Metadata", {}) return response.get("Metadata", {})
def download_to_temp(s3_key: str) -> str: def download_to_temp(s3_key: str) -> str:
"""Download file from S3 to temp file""" bucket_name = get_bucket_name()
client = get_client() client = get_client()
suffix = os.path.splitext(s3_key)[-1] or ".tmp" suffix = os.path.splitext(s3_key)[-1] or ".tmp"
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix) tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
client.download_fileobj(settings.s3_bucket, s3_key, tmp) client.download_fileobj(bucket_name, s3_key, tmp)
tmp.close() tmp.close()
return tmp.name return tmp.name
def presigned_download_url(s3_key: str, expires_in: int = 3600) -> str: def presigned_download_url(s3_key: str, expires_in: int = 3600) -> str:
"""Generate presigned download URL""" bucket_name = get_bucket_name()
client = get_client() client = get_client()
return client.generate_presigned_url( return client.generate_presigned_url(
"get_object", "get_object",
Params={"Bucket": settings.s3_bucket, "Key": s3_key}, Params={"Bucket": bucket_name, "Key": s3_key},
ExpiresIn=expires_in ExpiresIn=expires_in
) )

View File

@@ -58,9 +58,6 @@ def document_s3_key(org_id: str, document_id: str, filename: str) -> str:
return f"{s3_path_prefix(org_id, document_id)}{filename}" return f"{s3_path_prefix(org_id, document_id)}{filename}"
def sanitize_filename(filename: str) -> str: def sanitize_filename(filename: str) -> str:
"""Sanitize filename for S3""" """URL encode filename for S3"""
# Remove path separators and special characters from urllib.parse import quote
filename = filename.replace("/", "_").replace("\\", "_") return quote(filename, safe="")
# Keep only safe characters
safe_chars = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-_")
return "".join(c for c in filename if c in safe_chars)

View File

@@ -11,22 +11,8 @@ controllers:
env: env:
LOG_LEVEL: info LOG_LEVEL: info
PORT: "8082" PORT: "8082"
S3_ENDPOINT: COSI_BUCKET_INFO_PATH:
value: "https://dev.s3.corredorconect.com/" value: "/var/run/secrets/cosi/BucketInfo"
S3_ACCESS_KEY:
valueFrom:
secretKeyRef:
name: 'document-service-s3-credentials'
key: rootAccessKey
S3_SECRET_KEY:
valueFrom:
secretKeyRef:
name: 'document-service-s3-credentials'
key: rootSecretAccessKey
S3_BUCKET:
value: "document-bucket"
S3_REGION:
value: "us-east-1"
probes: probes:
liveness: liveness:
enabled: true enabled: true
@@ -46,13 +32,6 @@ controllers:
port: 8082 port: 8082
initialDelaySeconds: 5 initialDelaySeconds: 5
periodSeconds: 5 periodSeconds: 5
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 500m
memory: 512Mi
service: service:
main: main:
@@ -63,3 +42,36 @@ service:
port: 8082 port: 8082
protocol: HTTP protocol: HTTP
persistence:
cosi-bucket-info:
enabled: true
type: secret
name: document-service-s3-credentials
globalMounts:
- path: /var/run/secrets/cosi
readOnly: true
rawResources:
bucket:
enabled: true
apiVersion: objectstorage.k8s.io/v1alpha1
kind: BucketClaim
suffix: bucket
spec:
spec:
bucketClassName: seaweedfs
protocols:
- s3
bucket-access:
enabled: true
apiVersion: objectstorage.k8s.io/v1alpha1
kind: BucketAccess
suffix: bucket-access
spec:
spec:
bucketAccessClassName: seaweedfs
bucketClaimName: '{{ include "bjw-s.common.lib.chart.names.fullname" $ }}-bucket'
credentialsSecretName: document-service-s3-credentials
protocol: s3

View File

@@ -39,7 +39,7 @@ class TestDocumentUpload:
headers = {"Authorization": sample_auth_token} headers = {"Authorization": sample_auth_token}
response = test_client.post( response = test_client.post(
"/api/documents/upload", "/api/v1/documents/upload",
files=files, files=files,
data=data, data=data,
headers=headers headers=headers
@@ -61,7 +61,7 @@ class TestDocumentUpload:
headers = {"Authorization": sample_auth_token} headers = {"Authorization": sample_auth_token}
response = test_client.post( response = test_client.post(
"/api/documents/upload", "/api/v1/documents/upload",
files=files, files=files,
data=data, data=data,
headers=headers headers=headers
@@ -80,7 +80,7 @@ class TestDocumentUpload:
headers = {"Authorization": sample_auth_token} headers = {"Authorization": sample_auth_token}
response = test_client.post( response = test_client.post(
"/api/documents/upload", "/api/v1/documents/upload",
files=files, files=files,
data=data, data=data,
headers=headers headers=headers
@@ -97,7 +97,7 @@ class TestDocumentUpload:
data = {"org_id": "test-org-123"} data = {"org_id": "test-org-123"}
response = test_client.post( response = test_client.post(
"/api/documents/upload", "/api/v1/documents/upload",
files=files, files=files,
data=data data=data
) )
@@ -113,7 +113,7 @@ class TestDocumentUpload:
headers = {"Authorization": "Invalid token"} headers = {"Authorization": "Invalid token"}
response = test_client.post( response = test_client.post(
"/api/documents/upload", "/api/v1/documents/upload",
files=files, files=files,
data=data, data=data,
headers=headers headers=headers
@@ -127,7 +127,7 @@ class TestDocumentUpload:
headers = {"Authorization": sample_auth_token} headers = {"Authorization": sample_auth_token}
response = test_client.post( response = test_client.post(
"/api/documents/upload", "/api/v1/documents/upload",
data=data, data=data,
headers=headers headers=headers
) )
@@ -145,7 +145,7 @@ class TestDocumentMetadata:
headers = {"Authorization": sample_auth_token} headers = {"Authorization": sample_auth_token}
response = test_client.get( response = test_client.get(
"/api/documents/test-doc-456", "/api/v1/documents/test-doc-456",
params={"org_id": "test-org-123"}, params={"org_id": "test-org-123"},
headers=headers headers=headers
) )
@@ -155,7 +155,7 @@ class TestDocumentMetadata:
def test_get_document_without_auth_returns_401(self, test_client): def test_get_document_without_auth_returns_401(self, test_client):
"""Test getting document without auth returns 401.""" """Test getting document without auth returns 401."""
response = test_client.get("/api/documents/test-doc-456") response = test_client.get("/api/v1/documents/test-doc-456")
assert response.status_code == 401 assert response.status_code == 401
@@ -168,7 +168,7 @@ class TestDownloadUrl:
headers = {"Authorization": sample_auth_token} headers = {"Authorization": sample_auth_token}
response = test_client.get( response = test_client.get(
"/api/documents/test-doc-456/download-url", "/api/v1/documents/test-doc-456/download-url",
params={"org_id": "test-org-123"}, params={"org_id": "test-org-123"},
headers=headers headers=headers
) )
@@ -178,7 +178,7 @@ class TestDownloadUrl:
def test_get_download_url_without_auth_returns_401(self, test_client): def test_get_download_url_without_auth_returns_401(self, test_client):
"""Test getting download URL without auth returns 401.""" """Test getting download URL without auth returns 401."""
response = test_client.get("/api/documents/test-doc-456/download-url") response = test_client.get("/api/v1/documents/test-doc-456/download-url")
assert response.status_code == 401 assert response.status_code == 401
@@ -195,7 +195,7 @@ class TestPDFFieldDiscovery:
headers = {"Authorization": sample_auth_token} headers = {"Authorization": sample_auth_token}
upload_response = test_client.post( upload_response = test_client.post(
"/api/documents/upload", "/api/v1/documents/upload",
files=files, files=files,
data=data, data=data,
headers=headers headers=headers
@@ -207,7 +207,7 @@ class TestPDFFieldDiscovery:
# Get fields # Get fields
headers = {"Authorization": sample_auth_token} headers = {"Authorization": sample_auth_token}
response = test_client.get( response = test_client.get(
f"/api/documents/{document_id}/fields", f"/api/v1/documents/{document_id}/fields",
params={"org_id": "test-org-123"}, params={"org_id": "test-org-123"},
headers=headers headers=headers
) )
@@ -235,7 +235,7 @@ class TestPDFFieldDiscovery:
headers = {"Authorization": sample_auth_token} headers = {"Authorization": sample_auth_token}
upload_response = test_client.post( upload_response = test_client.post(
"/api/documents/upload", "/api/v1/documents/upload",
files=files, files=files,
data=data, data=data,
headers=headers headers=headers
@@ -247,7 +247,7 @@ class TestPDFFieldDiscovery:
# Get fields # Get fields
headers = {"Authorization": sample_auth_token} headers = {"Authorization": sample_auth_token}
response = test_client.get( response = test_client.get(
f"/api/documents/{document_id}/fields", f"/api/v1/documents/{document_id}/fields",
params={"org_id": "test-org-123"}, params={"org_id": "test-org-123"},
headers=headers headers=headers
) )
@@ -266,7 +266,7 @@ class TestPDFFieldDiscovery:
headers = {"Authorization": sample_auth_token} headers = {"Authorization": sample_auth_token}
upload_response = test_client.post( upload_response = test_client.post(
"/api/documents/upload", "/api/v1/documents/upload",
files=files, files=files,
data=data, data=data,
headers=headers headers=headers
@@ -278,7 +278,7 @@ class TestPDFFieldDiscovery:
# Get fields # Get fields
headers = {"Authorization": sample_auth_token} headers = {"Authorization": sample_auth_token}
response = test_client.get( response = test_client.get(
f"/api/documents/{document_id}/fields", f"/api/v1/documents/{document_id}/fields",
params={"org_id": "test-org-123"}, params={"org_id": "test-org-123"},
headers=headers headers=headers
) )
@@ -290,7 +290,7 @@ class TestPDFFieldDiscovery:
def test_get_pdf_fields_without_auth_returns_401(self, test_client): def test_get_pdf_fields_without_auth_returns_401(self, test_client):
"""Test getting PDF fields without auth returns 401.""" """Test getting PDF fields without auth returns 401."""
response = test_client.get("/api/documents/test-doc-456/fields") response = test_client.get("/api/v1/documents/test-doc-456/fields")
assert response.status_code == 401 assert response.status_code == 401
@@ -303,7 +303,7 @@ class TestDocumentDeletion:
headers = {"Authorization": sample_auth_token} headers = {"Authorization": sample_auth_token}
response = test_client.delete( response = test_client.delete(
"/api/documents/test-doc-456", "/api/v1/documents/test-doc-456",
params={"org_id": "test-org-123"}, params={"org_id": "test-org-123"},
headers=headers headers=headers
) )
@@ -313,7 +313,7 @@ class TestDocumentDeletion:
def test_delete_document_without_auth_returns_401(self, test_client): def test_delete_document_without_auth_returns_401(self, test_client):
"""Test deleting document without auth returns 401.""" """Test deleting document without auth returns 401."""
response = test_client.delete("/api/documents/test-doc-456") response = test_client.delete("/api/v1/documents/test-doc-456")
assert response.status_code == 401 assert response.status_code == 401
@@ -418,7 +418,7 @@ class TestCompleteWorkflow:
headers = {"Authorization": sample_auth_token} headers = {"Authorization": sample_auth_token}
upload_response = test_client.post( upload_response = test_client.post(
"/api/documents/upload", "/api/v1/documents/upload",
files=files, files=files,
data=data, data=data,
headers=headers headers=headers
@@ -430,28 +430,28 @@ class TestCompleteWorkflow:
# Get metadata # Get metadata
headers = {"Authorization": sample_auth_token} headers = {"Authorization": sample_auth_token}
metadata_response = test_client.get( metadata_response = test_client.get(
f"/api/documents/{document_id}", f"/api/v1/documents/{document_id}",
params={"org_id": "test-org-123"}, params={"org_id": "test-org-123"},
headers=headers headers=headers
) )
# Get fields # Get fields
fields_response = test_client.get( fields_response = test_client.get(
f"/api/documents/{document_id}/fields", f"/api/v1/documents/{document_id}/fields",
params={"org_id": "test-org-123"}, params={"org_id": "test-org-123"},
headers=headers headers=headers
) )
# Get download URL # Get download URL
download_response = test_client.get( download_response = test_client.get(
f"/api/documents/{document_id}/download-url", f"/api/v1/documents/{document_id}/download-url",
params={"org_id": "test-org-123"}, params={"org_id": "test-org-123"},
headers=headers headers=headers
) )
# Delete document # Delete document
delete_response = test_client.delete( delete_response = test_client.delete(
f"/api/documents/{document_id}", f"/api/v1/documents/{document_id}",
params={"org_id": "test-org-123"}, params={"org_id": "test-org-123"},
headers=headers headers=headers
) )