Initial commit of document-service
This commit is contained in:
68
tests/conftest.py
Normal file
68
tests/conftest.py
Normal file
@@ -0,0 +1,68 @@
|
||||
"""
|
||||
Test configuration and fixtures for document-service tests.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import os
|
||||
from fastapi.testclient import TestClient
|
||||
from unittest.mock import Mock, patch
|
||||
from moto import mock_aws
|
||||
import boto3
|
||||
|
||||
from app.main import app
|
||||
|
||||
# Test data paths
|
||||
FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "fixtures")
|
||||
|
||||
@pytest.fixture
|
||||
def test_client():
|
||||
"""Create a test client with auth bypass."""
|
||||
return TestClient(app)
|
||||
|
||||
@pytest.fixture
|
||||
def sample_org_id():
|
||||
"""Sample organization ID for testing."""
|
||||
return "test-org-123"
|
||||
|
||||
@pytest.fixture
|
||||
def sample_document_id():
|
||||
"""Sample document ID for testing."""
|
||||
return "test-doc-456"
|
||||
|
||||
@pytest.fixture
|
||||
def test_pdf_files():
|
||||
"""Paths to test PDF files."""
|
||||
return {
|
||||
"simple_form": os.path.join(FIXTURES_DIR, "simple_form.pdf"),
|
||||
"complex_form": os.path.join(FIXTURES_DIR, "complex_form.pdf"),
|
||||
"no_form": os.path.join(FIXTURES_DIR, "no_form.pdf"),
|
||||
"large_form": os.path.join(FIXTURES_DIR, "large_form.pdf"),
|
||||
}
|
||||
|
||||
@pytest.fixture
|
||||
def mock_s3_client():
|
||||
"""Create a mock S3 client for testing."""
|
||||
with mock_aws():
|
||||
client = boto3.client(
|
||||
"s3",
|
||||
region_name="us-east-1",
|
||||
aws_access_key_id="minioadmin",
|
||||
aws_secret_access_key="minioadmin",
|
||||
)
|
||||
# Create test bucket
|
||||
client.create_bucket(Bucket="document-bucket")
|
||||
yield client
|
||||
|
||||
@pytest.fixture
|
||||
def auth_bypass_middleware():
|
||||
"""Fixture to bypass auth middleware in tests."""
|
||||
def bypass_auth(request):
|
||||
request.state.org_id = "test-org-123"
|
||||
return request
|
||||
|
||||
return bypass_auth
|
||||
|
||||
@pytest.fixture
|
||||
def sample_auth_token():
|
||||
"""Sample auth token for testing."""
|
||||
return "Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJvcmdfaWQiOiJ0ZXN0LW9yZy0xMjMifQ.test"
|
||||
304
tests/fixtures/complex_form.pdf
vendored
Normal file
304
tests/fixtures/complex_form.pdf
vendored
Normal file
@@ -0,0 +1,304 @@
|
||||
%PDF-1.3
|
||||
%âãÏÓ
|
||||
1 0 obj
|
||||
<<
|
||||
/Producer (pypdf)
|
||||
>>
|
||||
endobj
|
||||
2 0 obj
|
||||
<<
|
||||
/Type /Pages
|
||||
/Count 1
|
||||
/Kids [ 4 0 R ]
|
||||
>>
|
||||
endobj
|
||||
3 0 obj
|
||||
<<
|
||||
/Type /Catalog
|
||||
/Pages 2 0 R
|
||||
/AcroForm <<
|
||||
/Fields [ <<
|
||||
/FT /Tx
|
||||
/T (first\137name)
|
||||
/V ()
|
||||
/Rect [ 200 690 400 710 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/FT /Tx
|
||||
/T (last\137name)
|
||||
/V ()
|
||||
/Rect [ 200 640 400 660 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/FT /Tx
|
||||
/T (email)
|
||||
/V ()
|
||||
/Rect [ 200 590 400 610 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/FT /Tx
|
||||
/T (phone)
|
||||
/V ()
|
||||
/Rect [ 200 540 400 560 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/FT /Tx
|
||||
/T (address)
|
||||
/V ()
|
||||
/Rect [ 200 490 400 510 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/FT /Tx
|
||||
/T (city)
|
||||
/V ()
|
||||
/Rect [ 200 440 400 460 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/FT /Tx
|
||||
/T (state)
|
||||
/V ()
|
||||
/Rect [ 200 390 400 410 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/FT /Tx
|
||||
/T (zip\137code)
|
||||
/V ()
|
||||
/Rect [ 200 340 400 360 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/FT /Ch
|
||||
/T (country)
|
||||
/V ()
|
||||
/Opt [ (USA) (Canada) (UK) (Germany) (France) ]
|
||||
/Rect [ 200 290 400 310 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/FT /Btn
|
||||
/T (gender)
|
||||
/V (male)
|
||||
/Rect [ 200 240 220 260 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/FT /Btn
|
||||
/T (gender)
|
||||
/V (female)
|
||||
/Rect [ 300 240 320 260 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/FT /Btn
|
||||
/T (reading)
|
||||
/V /Off
|
||||
/Rect [ 200 190 220 210 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/FT /Btn
|
||||
/T (sports)
|
||||
/V /Off
|
||||
/Rect [ 200 160 220 180 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/FT /Btn
|
||||
/T (music)
|
||||
/V /Off
|
||||
/Rect [ 200 130 220 150 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/FT /Btn
|
||||
/T (travel)
|
||||
/V /Off
|
||||
/Rect [ 200 100 220 120 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/FT /Btn
|
||||
/T (agree\137terms)
|
||||
/V /Off
|
||||
/Rect [ 200 140 220 160 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/FT /Tx
|
||||
/T (signature)
|
||||
/V ()
|
||||
/Rect [ 200 90 400 110 ]
|
||||
/Ff 0
|
||||
>> ]
|
||||
>>
|
||||
>>
|
||||
endobj
|
||||
4 0 obj
|
||||
<<
|
||||
/Contents 5 0 R
|
||||
/MediaBox [ 0 0 612 792 ]
|
||||
/Resources <<
|
||||
/Font 6 0 R
|
||||
/ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
||||
>>
|
||||
/Rotate 0
|
||||
/Trans <<
|
||||
>>
|
||||
/Type /Page
|
||||
/Parent 2 0 R
|
||||
/Annots [ <<
|
||||
/Subtype /Widget
|
||||
/FT /Tx
|
||||
/T (first\137name)
|
||||
/V ()
|
||||
/Rect [ 200 690 400 710 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/Subtype /Widget
|
||||
/FT /Tx
|
||||
/T (last\137name)
|
||||
/V ()
|
||||
/Rect [ 200 640 400 660 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/Subtype /Widget
|
||||
/FT /Tx
|
||||
/T (email)
|
||||
/V ()
|
||||
/Rect [ 200 590 400 610 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/Subtype /Widget
|
||||
/FT /Tx
|
||||
/T (phone)
|
||||
/V ()
|
||||
/Rect [ 200 540 400 560 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/Subtype /Widget
|
||||
/FT /Tx
|
||||
/T (address)
|
||||
/V ()
|
||||
/Rect [ 200 490 400 510 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/Subtype /Widget
|
||||
/FT /Tx
|
||||
/T (city)
|
||||
/V ()
|
||||
/Rect [ 200 440 400 460 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/Subtype /Widget
|
||||
/FT /Tx
|
||||
/T (state)
|
||||
/V ()
|
||||
/Rect [ 200 390 400 410 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/Subtype /Widget
|
||||
/FT /Tx
|
||||
/T (zip\137code)
|
||||
/V ()
|
||||
/Rect [ 200 340 400 360 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/Subtype /Widget
|
||||
/FT /Ch
|
||||
/T (country)
|
||||
/V ()
|
||||
/Rect [ 200 290 400 310 ]
|
||||
/Ff 0
|
||||
/Opt [ (USA) (Canada) (UK) (Germany) (France) ]
|
||||
>> <<
|
||||
/Subtype /Widget
|
||||
/FT /Btn
|
||||
/T (gender)
|
||||
/V (male)
|
||||
/Rect [ 200 240 220 260 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/Subtype /Widget
|
||||
/FT /Btn
|
||||
/T (gender)
|
||||
/V (female)
|
||||
/Rect [ 300 240 320 260 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/Subtype /Widget
|
||||
/FT /Btn
|
||||
/T (reading)
|
||||
/V /Off
|
||||
/Rect [ 200 190 220 210 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/Subtype /Widget
|
||||
/FT /Btn
|
||||
/T (sports)
|
||||
/V /Off
|
||||
/Rect [ 200 160 220 180 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/Subtype /Widget
|
||||
/FT /Btn
|
||||
/T (music)
|
||||
/V /Off
|
||||
/Rect [ 200 130 220 150 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/Subtype /Widget
|
||||
/FT /Btn
|
||||
/T (travel)
|
||||
/V /Off
|
||||
/Rect [ 200 100 220 120 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/Subtype /Widget
|
||||
/FT /Btn
|
||||
/T (agree\137terms)
|
||||
/V /Off
|
||||
/Rect [ 200 140 220 160 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/Subtype /Widget
|
||||
/FT /Tx
|
||||
/T (signature)
|
||||
/V ()
|
||||
/Rect [ 200 90 400 110 ]
|
||||
/Ff 0
|
||||
>> ]
|
||||
>>
|
||||
endobj
|
||||
5 0 obj
|
||||
<<
|
||||
/Filter [ /ASCII85Decode /FlateDecode ]
|
||||
/Length 291
|
||||
>>
|
||||
stream
|
||||
GasbV_+Fea&;KY%MZ9UrC9m8.oN"UdKHc".Gmj%B,>D(A;p`!tWO(4\)'k<]nE'P8R95j8f]2oKJNJY1f"tI,Dm8oIL>-,'An-7/XP_7&hmsPV2$VZlJVuKljga3q-e_fL*;+[hpAoJXWqmrLU,"s52O'g'kTenY-)^6!E]<t>XGGKULRl:>id?'u8b4h!>BX;G^/rC%S5.uq%27\VHe*eP7/%>f=QN:Hc+'*-ihD-.,/'o(;:.X+4s[#!Dq5i9,$f'o&NC;.U."[j3.eA/Se#D\)eRtd.%ou~>
|
||||
endstream
|
||||
endobj
|
||||
6 0 obj
|
||||
<<
|
||||
/F1 7 0 R
|
||||
>>
|
||||
endobj
|
||||
7 0 obj
|
||||
<<
|
||||
/BaseFont /Helvetica
|
||||
/Encoding /WinAnsiEncoding
|
||||
/Name /F1
|
||||
/Subtype /Type1
|
||||
/Type /Font
|
||||
>>
|
||||
endobj
|
||||
xref
|
||||
0 8
|
||||
0000000000 65535 f
|
||||
0000000015 00000 n
|
||||
0000000054 00000 n
|
||||
0000000113 00000 n
|
||||
0000001378 00000 n
|
||||
0000003056 00000 n
|
||||
0000003438 00000 n
|
||||
0000003469 00000 n
|
||||
trailer
|
||||
<<
|
||||
/Size 8
|
||||
/Root 3 0 R
|
||||
/Info 1 0 R
|
||||
>>
|
||||
startxref
|
||||
3576
|
||||
%%EOF
|
||||
371
tests/fixtures/generate_test_pdfs.py
vendored
Normal file
371
tests/fixtures/generate_test_pdfs.py
vendored
Normal file
@@ -0,0 +1,371 @@
|
||||
"""
|
||||
Generate test PDF files for document-service testing.
|
||||
|
||||
This script creates various test PDFs with actual AcroForm fields:
|
||||
- Simple form PDF with basic form fields
|
||||
- Complex form PDF with multiple field types
|
||||
- No form PDF without form fields
|
||||
- Large form PDF for size validation testing
|
||||
"""
|
||||
|
||||
import os
|
||||
from reportlab.pdfgen import canvas
|
||||
from reportlab.lib.pagesizes import letter
|
||||
from reportlab.lib import colors
|
||||
from pypdf import PdfReader, PdfWriter
|
||||
from pypdf.generic import (
|
||||
NameObject,
|
||||
create_string_object,
|
||||
NumberObject,
|
||||
ArrayObject,
|
||||
DictionaryObject,
|
||||
BooleanObject,
|
||||
)
|
||||
|
||||
# Output directory
|
||||
OUTPUT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
def create_simple_form_pdf():
|
||||
"""Create a simple PDF with basic form fields."""
|
||||
output_path = os.path.join(OUTPUT_DIR, "simple_form.pdf")
|
||||
|
||||
# Create base PDF with reportlab
|
||||
c = canvas.Canvas(output_path, pagesize=letter)
|
||||
c.setFont("Helvetica", 16)
|
||||
c.drawString(100, 750, "Simple Form Test")
|
||||
|
||||
c.setFont("Helvetica", 12)
|
||||
c.drawString(100, 700, "Name:")
|
||||
c.drawString(100, 650, "Email:")
|
||||
c.drawString(100, 600, "Phone:")
|
||||
c.drawString(100, 550, "Country:")
|
||||
c.drawString(100, 500, "Birth Date:")
|
||||
c.drawString(100, 450, "Agree to Terms:")
|
||||
|
||||
c.save()
|
||||
|
||||
# Add actual form fields using pypdf
|
||||
reader = PdfReader(output_path)
|
||||
writer = PdfWriter()
|
||||
|
||||
# Copy the page
|
||||
page = reader.pages[0]
|
||||
writer.add_page(page)
|
||||
|
||||
# Create form fields
|
||||
fields = []
|
||||
|
||||
# Name field (text)
|
||||
name_field = DictionaryObject({
|
||||
NameObject("/FT"): NameObject("/Tx"),
|
||||
NameObject("/T"): create_string_object("name"),
|
||||
NameObject("/V"): create_string_object(""),
|
||||
NameObject("/Rect"): ArrayObject([NumberObject(200), NumberObject(690), NumberObject(400), NumberObject(710)]),
|
||||
NameObject("/Ff"): NumberObject(0),
|
||||
})
|
||||
fields.append(name_field)
|
||||
|
||||
# Email field (text)
|
||||
email_field = DictionaryObject({
|
||||
NameObject("/FT"): NameObject("/Tx"),
|
||||
NameObject("/T"): create_string_object("email"),
|
||||
NameObject("/V"): create_string_object(""),
|
||||
NameObject("/Rect"): ArrayObject([NumberObject(200), NumberObject(640), NumberObject(400), NumberObject(660)]),
|
||||
NameObject("/Ff"): NumberObject(0),
|
||||
})
|
||||
fields.append(email_field)
|
||||
|
||||
# Phone field (text)
|
||||
phone_field = DictionaryObject({
|
||||
NameObject("/FT"): NameObject("/Tx"),
|
||||
NameObject("/T"): create_string_object("phone"),
|
||||
NameObject("/V"): create_string_object(""),
|
||||
NameObject("/Rect"): ArrayObject([NumberObject(200), NumberObject(590), NumberObject(400), NumberObject(610)]),
|
||||
NameObject("/Ff"): NumberObject(0),
|
||||
})
|
||||
fields.append(phone_field)
|
||||
|
||||
# Country field (dropdown/choice)
|
||||
country_field = DictionaryObject({
|
||||
NameObject("/FT"): NameObject("/Ch"),
|
||||
NameObject("/T"): create_string_object("country"),
|
||||
NameObject("/V"): create_string_object(""),
|
||||
NameObject("/Opt"): ArrayObject([
|
||||
create_string_object("USA"),
|
||||
create_string_object("Canada"),
|
||||
create_string_object("UK"),
|
||||
create_string_object("Germany"),
|
||||
create_string_object("France"),
|
||||
]),
|
||||
NameObject("/Rect"): ArrayObject([NumberObject(200), NumberObject(540), NumberObject(400), NumberObject(560)]),
|
||||
NameObject("/Ff"): NumberObject(0),
|
||||
})
|
||||
fields.append(country_field)
|
||||
|
||||
# Birth date field (text)
|
||||
birth_date_field = DictionaryObject({
|
||||
NameObject("/FT"): NameObject("/Tx"),
|
||||
NameObject("/T"): create_string_object("birth_date"),
|
||||
NameObject("/V"): create_string_object(""),
|
||||
NameObject("/Rect"): ArrayObject([NumberObject(200), NumberObject(490), NumberObject(400), NumberObject(510)]),
|
||||
NameObject("/Ff"): NumberObject(0),
|
||||
})
|
||||
fields.append(birth_date_field)
|
||||
|
||||
# Agree terms field (checkbox)
|
||||
agree_field = DictionaryObject({
|
||||
NameObject("/FT"): NameObject("/Btn"),
|
||||
NameObject("/T"): create_string_object("agree_terms"),
|
||||
NameObject("/V"): NameObject("/Off"),
|
||||
NameObject("/Rect"): ArrayObject([NumberObject(200), NumberObject(440), NumberObject(220), NumberObject(460)]),
|
||||
NameObject("/Ff"): NumberObject(0),
|
||||
})
|
||||
fields.append(agree_field)
|
||||
|
||||
# Add fields to the page
|
||||
page = writer.pages[0]
|
||||
if "/Annots" not in page:
|
||||
page[NameObject("/Annots")] = ArrayObject()
|
||||
|
||||
for field in fields:
|
||||
field_obj = DictionaryObject({
|
||||
NameObject("/Subtype"): NameObject("/Widget"),
|
||||
NameObject("/FT"): field[NameObject("/FT")],
|
||||
NameObject("/T"): field[NameObject("/T")],
|
||||
NameObject("/V"): field.get(NameObject("/V"), NameObject("")),
|
||||
NameObject("/Rect"): field[NameObject("/Rect")],
|
||||
NameObject("/Ff"): field.get(NameObject("/Ff"), NumberObject(0)),
|
||||
})
|
||||
|
||||
if NameObject("/Opt") in field:
|
||||
field_obj[NameObject("/Opt")] = field[NameObject("/Opt")]
|
||||
|
||||
page[NameObject("/Annots")].append(field_obj)
|
||||
|
||||
# Add AcroForm to the document
|
||||
acroform = DictionaryObject({
|
||||
NameObject("/Fields"): ArrayObject(fields),
|
||||
})
|
||||
writer._root_object[NameObject("/AcroForm")] = acroform
|
||||
|
||||
# Save the PDF
|
||||
with open(output_path, "wb") as f:
|
||||
writer.write(f)
|
||||
|
||||
print(f"Created: {output_path}")
|
||||
|
||||
def create_complex_form_pdf():
|
||||
"""Create a complex PDF with multiple field types."""
|
||||
output_path = os.path.join(OUTPUT_DIR, "complex_form.pdf")
|
||||
|
||||
# Create base PDF with reportlab
|
||||
c = canvas.Canvas(output_path, pagesize=letter)
|
||||
c.setFont("Helvetica", 16)
|
||||
c.drawString(100, 750, "Complex Form Test")
|
||||
|
||||
c.setFont("Helvetica", 12)
|
||||
c.drawString(100, 700, "First Name:")
|
||||
c.drawString(100, 650, "Last Name:")
|
||||
c.drawString(100, 600, "Email:")
|
||||
c.drawString(100, 550, "Phone:")
|
||||
c.drawString(100, 500, "Address:")
|
||||
c.drawString(100, 450, "City:")
|
||||
c.drawString(100, 400, "State:")
|
||||
c.drawString(100, 350, "Zip Code:")
|
||||
c.drawString(100, 300, "Country:")
|
||||
c.drawString(100, 250, "Gender:")
|
||||
c.drawString(100, 200, "Interests:")
|
||||
c.drawString(100, 150, "Agree to Terms:")
|
||||
c.drawString(100, 100, "Signature:")
|
||||
|
||||
c.save()
|
||||
|
||||
# Add actual form fields using pypdf
|
||||
reader = PdfReader(output_path)
|
||||
writer = PdfWriter()
|
||||
|
||||
# Copy the page
|
||||
page = reader.pages[0]
|
||||
writer.add_page(page)
|
||||
|
||||
# Create form fields
|
||||
fields = []
|
||||
|
||||
# Text fields
|
||||
text_fields = [
|
||||
('first_name', 200, 690),
|
||||
('last_name', 200, 640),
|
||||
('email', 200, 590),
|
||||
('phone', 200, 540),
|
||||
('address', 200, 490),
|
||||
('city', 200, 440),
|
||||
('state', 200, 390),
|
||||
('zip_code', 200, 340),
|
||||
]
|
||||
|
||||
for name, x, y in text_fields:
|
||||
field = DictionaryObject({
|
||||
NameObject("/FT"): NameObject("/Tx"),
|
||||
NameObject("/T"): create_string_object(name),
|
||||
NameObject("/V"): create_string_object(""),
|
||||
NameObject("/Rect"): ArrayObject([NumberObject(x), NumberObject(y), NumberObject(x + 200), NumberObject(y + 20)]),
|
||||
NameObject("/Ff"): NumberObject(0),
|
||||
})
|
||||
fields.append(field)
|
||||
|
||||
# Country dropdown
|
||||
country_field = DictionaryObject({
|
||||
NameObject("/FT"): NameObject("/Ch"),
|
||||
NameObject("/T"): create_string_object("country"),
|
||||
NameObject("/V"): create_string_object(""),
|
||||
NameObject("/Opt"): ArrayObject([
|
||||
create_string_object("USA"),
|
||||
create_string_object("Canada"),
|
||||
create_string_object("UK"),
|
||||
create_string_object("Germany"),
|
||||
create_string_object("France"),
|
||||
]),
|
||||
NameObject("/Rect"): ArrayObject([NumberObject(200), NumberObject(290), NumberObject(400), NumberObject(310)]),
|
||||
NameObject("/Ff"): NumberObject(0),
|
||||
})
|
||||
fields.append(country_field)
|
||||
|
||||
# Radio buttons for gender
|
||||
male_field = DictionaryObject({
|
||||
NameObject("/FT"): NameObject("/Btn"),
|
||||
NameObject("/T"): create_string_object("gender"),
|
||||
NameObject("/V"): create_string_object("male"),
|
||||
NameObject("/Rect"): ArrayObject([NumberObject(200), NumberObject(240), NumberObject(220), NumberObject(260)]),
|
||||
NameObject("/Ff"): NumberObject(0),
|
||||
})
|
||||
fields.append(male_field)
|
||||
|
||||
female_field = DictionaryObject({
|
||||
NameObject("/FT"): NameObject("/Btn"),
|
||||
NameObject("/T"): create_string_object("gender"),
|
||||
NameObject("/V"): create_string_object("female"),
|
||||
NameObject("/Rect"): ArrayObject([NumberObject(300), NumberObject(240), NumberObject(320), NumberObject(260)]),
|
||||
NameObject("/Ff"): NumberObject(0),
|
||||
})
|
||||
fields.append(female_field)
|
||||
|
||||
# Checkboxes for interests
|
||||
interests = ['reading', 'sports', 'music', 'travel']
|
||||
for i, interest in enumerate(interests):
|
||||
field = DictionaryObject({
|
||||
NameObject("/FT"): NameObject("/Btn"),
|
||||
NameObject("/T"): create_string_object(interest),
|
||||
NameObject("/V"): NameObject("/Off"),
|
||||
NameObject("/Rect"): ArrayObject([NumberObject(200), NumberObject(190 - (i * 30)), NumberObject(220), NumberObject(210 - (i * 30))]),
|
||||
NameObject("/Ff"): NumberObject(0),
|
||||
})
|
||||
fields.append(field)
|
||||
|
||||
# Checkbox for agree terms
|
||||
agree_field = DictionaryObject({
|
||||
NameObject("/FT"): NameObject("/Btn"),
|
||||
NameObject("/T"): create_string_object("agree_terms"),
|
||||
NameObject("/V"): NameObject("/Off"),
|
||||
NameObject("/Rect"): ArrayObject([NumberObject(200), NumberObject(140), NumberObject(220), NumberObject(160)]),
|
||||
NameObject("/Ff"): NumberObject(0),
|
||||
})
|
||||
fields.append(agree_field)
|
||||
|
||||
# Signature field
|
||||
signature_field = DictionaryObject({
|
||||
NameObject("/FT"): NameObject("/Tx"),
|
||||
NameObject("/T"): create_string_object("signature"),
|
||||
NameObject("/V"): create_string_object(""),
|
||||
NameObject("/Rect"): ArrayObject([NumberObject(200), NumberObject(90), NumberObject(400), NumberObject(110)]),
|
||||
NameObject("/Ff"): NumberObject(0),
|
||||
})
|
||||
fields.append(signature_field)
|
||||
|
||||
# Add fields to the page
|
||||
page = writer.pages[0]
|
||||
if "/Annots" not in page:
|
||||
page[NameObject("/Annots")] = ArrayObject()
|
||||
|
||||
for field in fields:
|
||||
field_obj = DictionaryObject({
|
||||
NameObject("/Subtype"): NameObject("/Widget"),
|
||||
NameObject("/FT"): field[NameObject("/FT")],
|
||||
NameObject("/T"): field[NameObject("/T")],
|
||||
NameObject("/V"): field.get(NameObject("/V"), NameObject("")),
|
||||
NameObject("/Rect"): field[NameObject("/Rect")],
|
||||
NameObject("/Ff"): field.get(NameObject("/Ff"), NumberObject(0)),
|
||||
})
|
||||
|
||||
if NameObject("/Opt") in field:
|
||||
field_obj[NameObject("/Opt")] = field[NameObject("/Opt")]
|
||||
|
||||
page[NameObject("/Annots")].append(field_obj)
|
||||
|
||||
# Add AcroForm to the document
|
||||
acroform = DictionaryObject({
|
||||
NameObject("/Fields"): ArrayObject(fields),
|
||||
})
|
||||
writer._root_object[NameObject("/AcroForm")] = acroform
|
||||
|
||||
# Save the PDF
|
||||
with open(output_path, "wb") as f:
|
||||
writer.write(f)
|
||||
|
||||
print(f"Created: {output_path}")
|
||||
|
||||
def create_no_form_pdf():
|
||||
"""Create a PDF without form fields."""
|
||||
output_path = os.path.join(OUTPUT_DIR, "no_form.pdf")
|
||||
|
||||
# Create simple PDF without form fields
|
||||
c = canvas.Canvas(output_path, pagesize=letter)
|
||||
c.setFont("Helvetica", 16)
|
||||
c.drawString(100, 750, "No Form Test")
|
||||
|
||||
c.setFont("Helvetica", 12)
|
||||
c.drawString(100, 700, "This PDF has no form fields.")
|
||||
c.drawString(100, 650, "It is used for testing field discovery")
|
||||
c.drawString(100, 600, "on documents without AcroForm fields.")
|
||||
|
||||
c.save()
|
||||
|
||||
print(f"Created: {output_path}")
|
||||
|
||||
def create_large_form_pdf():
|
||||
"""Create a large PDF for size validation testing."""
|
||||
output_path = os.path.join(OUTPUT_DIR, "large_form.pdf")
|
||||
|
||||
# Create a larger PDF with more content
|
||||
c = canvas.Canvas(output_path, pagesize=letter)
|
||||
c.setFont("Helvetica", 16)
|
||||
c.drawString(100, 750, "Large Form Test")
|
||||
|
||||
c.setFont("Helvetica", 12)
|
||||
y = 700
|
||||
for i in range(50):
|
||||
c.drawString(100, y, f"Field {i + 1}:")
|
||||
y -= 50
|
||||
if y < 50:
|
||||
c.showPage()
|
||||
y = 700
|
||||
|
||||
c.save()
|
||||
|
||||
print(f"Created: {output_path}")
|
||||
|
||||
def main():
|
||||
"""Generate all test PDF files."""
|
||||
print("Generating test PDF files...")
|
||||
print(f"Output directory: {OUTPUT_DIR}")
|
||||
print()
|
||||
|
||||
create_simple_form_pdf()
|
||||
create_complex_form_pdf()
|
||||
create_no_form_pdf()
|
||||
create_large_form_pdf()
|
||||
|
||||
print()
|
||||
print("All test PDF files generated successfully!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
125
tests/fixtures/large_form.pdf
vendored
Normal file
125
tests/fixtures/large_form.pdf
vendored
Normal file
@@ -0,0 +1,125 @@
|
||||
%PDF-1.3
|
||||
%“Œ‹ž ReportLab Generated PDF document (opensource)
|
||||
1 0 obj
|
||||
<<
|
||||
/F1 2 0 R
|
||||
>>
|
||||
endobj
|
||||
2 0 obj
|
||||
<<
|
||||
/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
|
||||
>>
|
||||
endobj
|
||||
3 0 obj
|
||||
<<
|
||||
/Contents 10 0 R /MediaBox [ 0 0 612 792 ] /Parent 9 0 R /Resources <<
|
||||
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
||||
>> /Rotate 0 /Trans <<
|
||||
|
||||
>>
|
||||
/Type /Page
|
||||
>>
|
||||
endobj
|
||||
4 0 obj
|
||||
<<
|
||||
/Contents 11 0 R /MediaBox [ 0 0 612 792 ] /Parent 9 0 R /Resources <<
|
||||
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
||||
>> /Rotate 0 /Trans <<
|
||||
|
||||
>>
|
||||
/Type /Page
|
||||
>>
|
||||
endobj
|
||||
5 0 obj
|
||||
<<
|
||||
/Contents 12 0 R /MediaBox [ 0 0 612 792 ] /Parent 9 0 R /Resources <<
|
||||
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
||||
>> /Rotate 0 /Trans <<
|
||||
|
||||
>>
|
||||
/Type /Page
|
||||
>>
|
||||
endobj
|
||||
6 0 obj
|
||||
<<
|
||||
/Contents 13 0 R /MediaBox [ 0 0 612 792 ] /Parent 9 0 R /Resources <<
|
||||
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
||||
>> /Rotate 0 /Trans <<
|
||||
|
||||
>>
|
||||
/Type /Page
|
||||
>>
|
||||
endobj
|
||||
7 0 obj
|
||||
<<
|
||||
/PageMode /UseNone /Pages 9 0 R /Type /Catalog
|
||||
>>
|
||||
endobj
|
||||
8 0 obj
|
||||
<<
|
||||
/Author (anonymous) /CreationDate (D:19800101000000+00'00') /Creator (anonymous) /Keywords () /ModDate (D:19800101000000+00'00') /Producer (ReportLab PDF Library - \(opensource\))
|
||||
/Subject (unspecified) /Title (untitled) /Trapped /False
|
||||
>>
|
||||
endobj
|
||||
9 0 obj
|
||||
<<
|
||||
/Count 4 /Kids [ 3 0 R 4 0 R 5 0 R 6 0 R ] /Type /Pages
|
||||
>>
|
||||
endobj
|
||||
10 0 obj
|
||||
<<
|
||||
/Filter [ /ASCII85Decode /FlateDecode ] /Length 217
|
||||
>>
|
||||
stream
|
||||
Gas30YmS?5&;9"+:GJ\L`7rI@@Oq[]V;)ju4[h(2dJ$.fMDlYNi/6XZ9/-MBqIFpH"0bWR4+VY?&JE4dmBP4$H`s>o>Pd5_5(knN-9C@@=hbnO$/KG<T]uHC6SHeT%fQ2(61,2)kB&jPeh#ln*V7]`-(1#q7P]TrOr967OBGd6R>k'EA?N"sbgn1*RGt<48$Z/.<iqdC<HBN;BdXTjQboF?~>endstream
|
||||
endobj
|
||||
11 0 obj
|
||||
<<
|
||||
/Filter [ /ASCII85Decode /FlateDecode ] /Length 179
|
||||
>>
|
||||
stream
|
||||
Gas30^C%h3*5qB\:N<.Pcs3$Hl<(9Sj6mHT",_O,eK?ILEeIs/+25o1W?$HFlO(jerB`1_*amY9`!,>fg-:(O.:HsM<c")brI"e6WCOT4gHTe]6:XPR3Z2,/H>lia7mi26F)k6[R>)2Tc&QO]0JmRQ33#uf(:EGYU/pYb,%W<I+0;`+EW~>endstream
|
||||
endobj
|
||||
12 0 obj
|
||||
<<
|
||||
/Filter [ /ASCII85Decode /FlateDecode ] /Length 182
|
||||
>>
|
||||
stream
|
||||
Gas30YmS?%'SYMZ:N8jHd+m]ZXcA"(*:Fj!$As93eK>>CO@)QnnF80POP6tcHWu&Bi%Q$",OR8C45u,jFR@u"e5F01DQMJaO6&5D+&?+Z'=%F%qt`rY;O"3#"KbqRMK6*1l<JI#\QT.g>jW9fl6'd&lDQ+4eQPFB=)/[R?*6VZ`^9D([>Kog~>endstream
|
||||
endobj
|
||||
13 0 obj
|
||||
<<
|
||||
/Filter [ /ASCII85Decode /FlateDecode ] /Length 147
|
||||
>>
|
||||
stream
|
||||
Gas3+3spL'$q8S#<P23]FJ9Y&V4a)bG2NT>h1+`5('Z%;U^2`KE+.t@o*+c<HmDMhfg)&^AATHdpsVmX3RhL!69O]%\U_jUJK0dDLK7_Y[]$?TK6gh*/?5bY6!78.Ms>%mcr*lWqbfg@lpOeX~>endstream
|
||||
endobj
|
||||
xref
|
||||
0 14
|
||||
0000000000 65535 f
|
||||
0000000061 00000 n
|
||||
0000000092 00000 n
|
||||
0000000199 00000 n
|
||||
0000000393 00000 n
|
||||
0000000587 00000 n
|
||||
0000000781 00000 n
|
||||
0000000975 00000 n
|
||||
0000001043 00000 n
|
||||
0000001304 00000 n
|
||||
0000001381 00000 n
|
||||
0000001689 00000 n
|
||||
0000001959 00000 n
|
||||
0000002232 00000 n
|
||||
trailer
|
||||
<<
|
||||
/ID
|
||||
[<30157dc3b9cf65b8d1eaf3493559908e><30157dc3b9cf65b8d1eaf3493559908e>]
|
||||
% ReportLab generated PDF document -- digest (opensource)
|
||||
|
||||
/Info 8 0 R
|
||||
/Root 7 0 R
|
||||
/Size 14
|
||||
>>
|
||||
startxref
|
||||
2470
|
||||
%%EOF
|
||||
68
tests/fixtures/no_form.pdf
vendored
Normal file
68
tests/fixtures/no_form.pdf
vendored
Normal file
@@ -0,0 +1,68 @@
|
||||
%PDF-1.3
|
||||
%“Œ‹ž ReportLab Generated PDF document (opensource)
|
||||
1 0 obj
|
||||
<<
|
||||
/F1 2 0 R
|
||||
>>
|
||||
endobj
|
||||
2 0 obj
|
||||
<<
|
||||
/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
|
||||
>>
|
||||
endobj
|
||||
3 0 obj
|
||||
<<
|
||||
/Contents 7 0 R /MediaBox [ 0 0 612 792 ] /Parent 6 0 R /Resources <<
|
||||
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
||||
>> /Rotate 0 /Trans <<
|
||||
|
||||
>>
|
||||
/Type /Page
|
||||
>>
|
||||
endobj
|
||||
4 0 obj
|
||||
<<
|
||||
/PageMode /UseNone /Pages 6 0 R /Type /Catalog
|
||||
>>
|
||||
endobj
|
||||
5 0 obj
|
||||
<<
|
||||
/Author (anonymous) /CreationDate (D:19800101000000+00'00') /Creator (anonymous) /Keywords () /ModDate (D:19800101000000+00'00') /Producer (ReportLab PDF Library - \(opensource\))
|
||||
/Subject (unspecified) /Title (untitled) /Trapped /False
|
||||
>>
|
||||
endobj
|
||||
6 0 obj
|
||||
<<
|
||||
/Count 1 /Kids [ 3 0 R ] /Type /Pages
|
||||
>>
|
||||
endobj
|
||||
7 0 obj
|
||||
<<
|
||||
/Filter [ /ASCII85Decode /FlateDecode ] /Length 226
|
||||
>>
|
||||
stream
|
||||
Gas2Bb6l*?&4Q?lMRuh(2(>rm;UL(=iaR@%P12s;!_o]ip\#oA:h3rL(XCuYYkiVA702`\bERWLTF<pmA'bMe$GLl8m[Gp,mCZM>`irc(:k@<Q,.1t_;U3TSGL0f4RBV`'XKta+*A74'q:3;`A;r@nl60Fm[LVPtD`E'mGib0+5kmB/Rp3p#C+&@HQ1$r/^;:dZ/#koRn*nah\!>!7PW#)X61=m`OB9!~>endstream
|
||||
endobj
|
||||
xref
|
||||
0 8
|
||||
0000000000 65535 f
|
||||
0000000061 00000 n
|
||||
0000000092 00000 n
|
||||
0000000199 00000 n
|
||||
0000000392 00000 n
|
||||
0000000460 00000 n
|
||||
0000000721 00000 n
|
||||
0000000780 00000 n
|
||||
trailer
|
||||
<<
|
||||
/ID
|
||||
[<30157dc3b9cf65b8d1eaf3493559908e><30157dc3b9cf65b8d1eaf3493559908e>]
|
||||
% ReportLab generated PDF document -- digest (opensource)
|
||||
|
||||
/Info 5 0 R
|
||||
/Root 4 0 R
|
||||
/Size 8
|
||||
>>
|
||||
startxref
|
||||
1096
|
||||
%%EOF
|
||||
161
tests/fixtures/simple_form.pdf
vendored
Normal file
161
tests/fixtures/simple_form.pdf
vendored
Normal file
@@ -0,0 +1,161 @@
|
||||
%PDF-1.3
|
||||
%âãÏÓ
|
||||
1 0 obj
|
||||
<<
|
||||
/Producer (pypdf)
|
||||
>>
|
||||
endobj
|
||||
2 0 obj
|
||||
<<
|
||||
/Type /Pages
|
||||
/Count 1
|
||||
/Kids [ 4 0 R ]
|
||||
>>
|
||||
endobj
|
||||
3 0 obj
|
||||
<<
|
||||
/Type /Catalog
|
||||
/Pages 2 0 R
|
||||
/AcroForm <<
|
||||
/Fields [ <<
|
||||
/FT /Tx
|
||||
/T (name)
|
||||
/V ()
|
||||
/Rect [ 200 690 400 710 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/FT /Tx
|
||||
/T (email)
|
||||
/V ()
|
||||
/Rect [ 200 640 400 660 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/FT /Tx
|
||||
/T (phone)
|
||||
/V ()
|
||||
/Rect [ 200 590 400 610 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/FT /Ch
|
||||
/T (country)
|
||||
/V ()
|
||||
/Opt [ (USA) (Canada) (UK) (Germany) (France) ]
|
||||
/Rect [ 200 540 400 560 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/FT /Tx
|
||||
/T (birth\137date)
|
||||
/V ()
|
||||
/Rect [ 200 490 400 510 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/FT /Btn
|
||||
/T (agree\137terms)
|
||||
/V /Off
|
||||
/Rect [ 200 440 220 460 ]
|
||||
/Ff 0
|
||||
>> ]
|
||||
>>
|
||||
>>
|
||||
endobj
|
||||
4 0 obj
|
||||
<<
|
||||
/Contents 5 0 R
|
||||
/MediaBox [ 0 0 612 792 ]
|
||||
/Resources <<
|
||||
/Font 6 0 R
|
||||
/ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
||||
>>
|
||||
/Rotate 0
|
||||
/Trans <<
|
||||
>>
|
||||
/Type /Page
|
||||
/Parent 2 0 R
|
||||
/Annots [ <<
|
||||
/Subtype /Widget
|
||||
/FT /Tx
|
||||
/T (name)
|
||||
/V ()
|
||||
/Rect [ 200 690 400 710 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/Subtype /Widget
|
||||
/FT /Tx
|
||||
/T (email)
|
||||
/V ()
|
||||
/Rect [ 200 640 400 660 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/Subtype /Widget
|
||||
/FT /Tx
|
||||
/T (phone)
|
||||
/V ()
|
||||
/Rect [ 200 590 400 610 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/Subtype /Widget
|
||||
/FT /Ch
|
||||
/T (country)
|
||||
/V ()
|
||||
/Rect [ 200 540 400 560 ]
|
||||
/Ff 0
|
||||
/Opt [ (USA) (Canada) (UK) (Germany) (France) ]
|
||||
>> <<
|
||||
/Subtype /Widget
|
||||
/FT /Tx
|
||||
/T (birth\137date)
|
||||
/V ()
|
||||
/Rect [ 200 490 400 510 ]
|
||||
/Ff 0
|
||||
>> <<
|
||||
/Subtype /Widget
|
||||
/FT /Btn
|
||||
/T (agree\137terms)
|
||||
/V /Off
|
||||
/Rect [ 200 440 220 460 ]
|
||||
/Ff 0
|
||||
>> ]
|
||||
>>
|
||||
endobj
|
||||
5 0 obj
|
||||
<<
|
||||
/Filter [ /ASCII85Decode /FlateDecode ]
|
||||
/Length 214
|
||||
>>
|
||||
stream
|
||||
Gas3/_$YcZ&-h():[oO-KC+O7Fj&337*rSs`0Q/<`k!1:qntBjLh1!*5Q?*5,9cn2L]>4V7T^E=1'1`)j"LZXOAkYndii(Rd4^iHO@!??#S:KhY5-Hn'\Y63F`n8+K,.t]c\@9%516]H[@*&9CT1O*F'1H9T&WS2DLGjN]UaM[f"?B)-YBck(&"KsZ*@fJ2kq(gmZ1he)\4'9")1e>M#~>
|
||||
endstream
|
||||
endobj
|
||||
6 0 obj
|
||||
<<
|
||||
/F1 7 0 R
|
||||
>>
|
||||
endobj
|
||||
7 0 obj
|
||||
<<
|
||||
/BaseFont /Helvetica
|
||||
/Encoding /WinAnsiEncoding
|
||||
/Name /F1
|
||||
/Subtype /Type1
|
||||
/Type /Font
|
||||
>>
|
||||
endobj
|
||||
xref
|
||||
0 8
|
||||
0000000000 65535 f
|
||||
0000000015 00000 n
|
||||
0000000054 00000 n
|
||||
0000000113 00000 n
|
||||
0000000637 00000 n
|
||||
0000001387 00000 n
|
||||
0000001692 00000 n
|
||||
0000001723 00000 n
|
||||
trailer
|
||||
<<
|
||||
/Size 8
|
||||
/Root 3 0 R
|
||||
/Info 1 0 R
|
||||
>>
|
||||
startxref
|
||||
1830
|
||||
%%EOF
|
||||
464
tests/test_documents.py
Normal file
464
tests/test_documents.py
Normal file
@@ -0,0 +1,464 @@
|
||||
"""
|
||||
Comprehensive test suite for document-service.
|
||||
|
||||
Tests document upload, retrieval, field discovery, and complete workflows.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import os
|
||||
from fastapi.testclient import TestClient
|
||||
from unittest.mock import Mock, patch
|
||||
from moto import mock_aws
|
||||
import boto3
|
||||
|
||||
from app.main import app
|
||||
from app.pdf import discover_fields
|
||||
|
||||
# Test data paths
|
||||
FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "fixtures")
|
||||
|
||||
|
||||
class TestHealthEndpoint:
|
||||
"""Test health endpoint functionality."""
|
||||
|
||||
def test_health_endpoint(self, test_client):
|
||||
"""Test health endpoint returns 200 OK."""
|
||||
response = test_client.get("/health")
|
||||
assert response.status_code == 200
|
||||
assert response.json() == {"status": "ok"}
|
||||
|
||||
|
||||
class TestDocumentUpload:
|
||||
"""Test document upload functionality."""
|
||||
|
||||
def test_upload_simple_pdf_success(self, test_client, test_pdf_files, sample_auth_token):
|
||||
"""Test uploading a simple PDF with form fields."""
|
||||
with open(test_pdf_files["simple_form"], "rb") as f:
|
||||
files = {"file": ("simple_form.pdf", f, "application/pdf")}
|
||||
data = {"org_id": "test-org-123"}
|
||||
headers = {"Authorization": sample_auth_token}
|
||||
|
||||
response = test_client.post(
|
||||
"/api/documents/upload",
|
||||
files=files,
|
||||
data=data,
|
||||
headers=headers
|
||||
)
|
||||
|
||||
assert response.status_code == 201
|
||||
result = response.json()
|
||||
assert "document_id" in result
|
||||
assert "metadata" in result
|
||||
assert "download_url" in result
|
||||
assert result["metadata"]["document_type"] == "pdf"
|
||||
assert result["metadata"]["filename"] == "simple_form.pdf"
|
||||
|
||||
def test_upload_complex_pdf_success(self, test_client, test_pdf_files, sample_auth_token):
|
||||
"""Test uploading a complex PDF with multiple field types."""
|
||||
with open(test_pdf_files["complex_form"], "rb") as f:
|
||||
files = {"file": ("complex_form.pdf", f, "application/pdf")}
|
||||
data = {"org_id": "test-org-123"}
|
||||
headers = {"Authorization": sample_auth_token}
|
||||
|
||||
response = test_client.post(
|
||||
"/api/documents/upload",
|
||||
files=files,
|
||||
data=data,
|
||||
headers=headers
|
||||
)
|
||||
|
||||
assert response.status_code == 201
|
||||
result = response.json()
|
||||
assert "document_id" in result
|
||||
assert result["metadata"]["document_type"] == "pdf"
|
||||
|
||||
def test_upload_no_form_pdf_success(self, test_client, test_pdf_files, sample_auth_token):
|
||||
"""Test uploading a PDF without form fields."""
|
||||
with open(test_pdf_files["no_form"], "rb") as f:
|
||||
files = {"file": ("no_form.pdf", f, "application/pdf")}
|
||||
data = {"org_id": "test-org-123"}
|
||||
headers = {"Authorization": sample_auth_token}
|
||||
|
||||
response = test_client.post(
|
||||
"/api/documents/upload",
|
||||
files=files,
|
||||
data=data,
|
||||
headers=headers
|
||||
)
|
||||
|
||||
assert response.status_code == 201
|
||||
result = response.json()
|
||||
assert "document_id" in result
|
||||
|
||||
def test_upload_without_auth_returns_401(self, test_client, test_pdf_files):
|
||||
"""Test upload without auth returns 401."""
|
||||
with open(test_pdf_files["simple_form"], "rb") as f:
|
||||
files = {"file": ("simple_form.pdf", f, "application/pdf")}
|
||||
data = {"org_id": "test-org-123"}
|
||||
|
||||
response = test_client.post(
|
||||
"/api/documents/upload",
|
||||
files=files,
|
||||
data=data
|
||||
)
|
||||
|
||||
assert response.status_code == 401
|
||||
assert "detail" in response.json()
|
||||
|
||||
def test_upload_with_invalid_auth_returns_401(self, test_client, test_pdf_files):
|
||||
"""Test upload with invalid auth returns 401."""
|
||||
with open(test_pdf_files["simple_form"], "rb") as f:
|
||||
files = {"file": ("simple_form.pdf", f, "application/pdf")}
|
||||
data = {"org_id": "test-org-123"}
|
||||
headers = {"Authorization": "Invalid token"}
|
||||
|
||||
response = test_client.post(
|
||||
"/api/documents/upload",
|
||||
files=files,
|
||||
data=data,
|
||||
headers=headers
|
||||
)
|
||||
|
||||
assert response.status_code == 401
|
||||
|
||||
def test_upload_missing_file_returns_400(self, test_client, sample_auth_token):
|
||||
"""Test upload without file returns 400."""
|
||||
data = {"org_id": "test-org-123"}
|
||||
headers = {"Authorization": sample_auth_token}
|
||||
|
||||
response = test_client.post(
|
||||
"/api/documents/upload",
|
||||
data=data,
|
||||
headers=headers
|
||||
)
|
||||
|
||||
assert response.status_code == 422 # FastAPI validation error
|
||||
|
||||
|
||||
class TestDocumentMetadata:
|
||||
"""Test document metadata retrieval."""
|
||||
|
||||
def test_get_document_metadata_success(self, test_client, sample_auth_token):
|
||||
"""Test getting document metadata successfully."""
|
||||
# This test would require a document to be uploaded first
|
||||
# For now, we'll test the endpoint structure
|
||||
headers = {"Authorization": sample_auth_token}
|
||||
|
||||
response = test_client.get(
|
||||
"/api/documents/test-doc-456",
|
||||
params={"org_id": "test-org-123"},
|
||||
headers=headers
|
||||
)
|
||||
|
||||
# Will return 404 since document doesn't exist, but endpoint is accessible
|
||||
assert response.status_code in [404, 403]
|
||||
|
||||
def test_get_document_without_auth_returns_401(self, test_client):
|
||||
"""Test getting document without auth returns 401."""
|
||||
response = test_client.get("/api/documents/test-doc-456")
|
||||
|
||||
assert response.status_code == 401
|
||||
|
||||
|
||||
class TestDownloadUrl:
|
||||
"""Test download URL generation."""
|
||||
|
||||
def test_get_download_url_success(self, test_client, sample_auth_token):
|
||||
"""Test getting download URL successfully."""
|
||||
headers = {"Authorization": sample_auth_token}
|
||||
|
||||
response = test_client.get(
|
||||
"/api/documents/test-doc-456/download-url",
|
||||
params={"org_id": "test-org-123"},
|
||||
headers=headers
|
||||
)
|
||||
|
||||
# Will return 404 since document doesn't exist, but endpoint is accessible
|
||||
assert response.status_code in [404, 403]
|
||||
|
||||
def test_get_download_url_without_auth_returns_401(self, test_client):
|
||||
"""Test getting download URL without auth returns 401."""
|
||||
response = test_client.get("/api/documents/test-doc-456/download-url")
|
||||
|
||||
assert response.status_code == 401
|
||||
|
||||
|
||||
class TestPDFFieldDiscovery:
|
||||
"""Test PDF field discovery functionality."""
|
||||
|
||||
def test_get_pdf_fields_simple_form(self, test_client, test_pdf_files, sample_auth_token):
|
||||
"""Test getting PDF fields from simple form."""
|
||||
# First upload the document
|
||||
with open(test_pdf_files["simple_form"], "rb") as f:
|
||||
files = {"file": ("simple_form.pdf", f, "application/pdf")}
|
||||
data = {"org_id": "test-org-123"}
|
||||
headers = {"Authorization": sample_auth_token}
|
||||
|
||||
upload_response = test_client.post(
|
||||
"/api/documents/upload",
|
||||
files=files,
|
||||
data=data,
|
||||
headers=headers
|
||||
)
|
||||
|
||||
if upload_response.status_code == 201:
|
||||
document_id = upload_response.json()["document_id"]
|
||||
|
||||
# Get fields
|
||||
headers = {"Authorization": sample_auth_token}
|
||||
response = test_client.get(
|
||||
f"/api/documents/{document_id}/fields",
|
||||
params={"org_id": "test-org-123"},
|
||||
headers=headers
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
result = response.json()
|
||||
assert "fields" in result
|
||||
assert len(result["fields"]) == 6 # name, email, phone, country, birth_date, agree_terms
|
||||
|
||||
# Check field types
|
||||
field_types = {f["field"]: f["type"] for f in result["fields"]}
|
||||
assert field_types["name"] == "string"
|
||||
assert field_types["email"] == "string"
|
||||
assert field_types["phone"] == "string"
|
||||
assert field_types["country"] == "select"
|
||||
assert field_types["birth_date"] == "date"
|
||||
assert field_types["agree_terms"] == "boolean"
|
||||
|
||||
def test_get_pdf_fields_complex_form(self, test_client, test_pdf_files, sample_auth_token):
|
||||
"""Test getting PDF fields from complex form."""
|
||||
# First upload the document
|
||||
with open(test_pdf_files["complex_form"], "rb") as f:
|
||||
files = {"file": ("complex_form.pdf", f, "application/pdf")}
|
||||
data = {"org_id": "test-org-123"}
|
||||
headers = {"Authorization": sample_auth_token}
|
||||
|
||||
upload_response = test_client.post(
|
||||
"/api/documents/upload",
|
||||
files=files,
|
||||
data=data,
|
||||
headers=headers
|
||||
)
|
||||
|
||||
if upload_response.status_code == 201:
|
||||
document_id = upload_response.json()["document_id"]
|
||||
|
||||
# Get fields
|
||||
headers = {"Authorization": sample_auth_token}
|
||||
response = test_client.get(
|
||||
f"/api/documents/{document_id}/fields",
|
||||
params={"org_id": "test-org-123"},
|
||||
headers=headers
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
result = response.json()
|
||||
assert "fields" in result
|
||||
assert len(result["fields"]) == 16 # All fields from complex form
|
||||
|
||||
def test_get_pdf_fields_no_form_returns_empty_list(self, test_client, test_pdf_files, sample_auth_token):
|
||||
"""Test getting PDF fields from PDF without form fields."""
|
||||
# First upload the document
|
||||
with open(test_pdf_files["no_form"], "rb") as f:
|
||||
files = {"file": ("no_form.pdf", f, "application/pdf")}
|
||||
data = {"org_id": "test-org-123"}
|
||||
headers = {"Authorization": sample_auth_token}
|
||||
|
||||
upload_response = test_client.post(
|
||||
"/api/documents/upload",
|
||||
files=files,
|
||||
data=data,
|
||||
headers=headers
|
||||
)
|
||||
|
||||
if upload_response.status_code == 201:
|
||||
document_id = upload_response.json()["document_id"]
|
||||
|
||||
# Get fields
|
||||
headers = {"Authorization": sample_auth_token}
|
||||
response = test_client.get(
|
||||
f"/api/documents/{document_id}/fields",
|
||||
params={"org_id": "test-org-123"},
|
||||
headers=headers
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
result = response.json()
|
||||
assert "fields" in result
|
||||
assert len(result["fields"]) == 0
|
||||
|
||||
def test_get_pdf_fields_without_auth_returns_401(self, test_client):
|
||||
"""Test getting PDF fields without auth returns 401."""
|
||||
response = test_client.get("/api/documents/test-doc-456/fields")
|
||||
|
||||
assert response.status_code == 401
|
||||
|
||||
|
||||
class TestDocumentDeletion:
|
||||
"""Test document deletion functionality."""
|
||||
|
||||
def test_delete_document_success(self, test_client, sample_auth_token):
|
||||
"""Test deleting document successfully."""
|
||||
headers = {"Authorization": sample_auth_token}
|
||||
|
||||
response = test_client.delete(
|
||||
"/api/documents/test-doc-456",
|
||||
params={"org_id": "test-org-123"},
|
||||
headers=headers
|
||||
)
|
||||
|
||||
# Will return 404 since document doesn't exist, but endpoint is accessible
|
||||
assert response.status_code in [404, 403]
|
||||
|
||||
def test_delete_document_without_auth_returns_401(self, test_client):
|
||||
"""Test deleting document without auth returns 401."""
|
||||
response = test_client.delete("/api/documents/test-doc-456")
|
||||
|
||||
assert response.status_code == 401
|
||||
|
||||
|
||||
class TestPDFFieldDiscoveryDirect:
|
||||
"""Test PDF field discovery directly (without API)."""
|
||||
|
||||
def test_simple_form_pdf_fields(self, test_pdf_files):
|
||||
"""Test field discovery on simple form PDF."""
|
||||
fields = discover_fields(test_pdf_files["simple_form"])
|
||||
|
||||
assert len(fields) == 6
|
||||
field_names = [f["field"] for f in fields]
|
||||
assert "name" in field_names
|
||||
assert "email" in field_names
|
||||
assert "phone" in field_names
|
||||
assert "country" in field_names
|
||||
assert "birth_date" in field_names
|
||||
assert "agree_terms" in field_names
|
||||
|
||||
# Check field types
|
||||
field_types = {f["field"]: f["type"] for f in fields}
|
||||
assert field_types["name"] == "string"
|
||||
assert field_types["email"] == "string"
|
||||
assert field_types["phone"] == "string"
|
||||
assert field_types["country"] == "select"
|
||||
assert field_types["birth_date"] == "date"
|
||||
assert field_types["agree_terms"] == "boolean"
|
||||
|
||||
def test_complex_form_pdf_fields(self, test_pdf_files):
|
||||
"""Test field discovery on complex form PDF."""
|
||||
fields = discover_fields(test_pdf_files["complex_form"])
|
||||
|
||||
assert len(fields) == 16
|
||||
field_names = [f["field"] for f in fields]
|
||||
|
||||
# Check for expected fields
|
||||
assert "first_name" in field_names
|
||||
assert "last_name" in field_names
|
||||
assert "email" in field_names
|
||||
assert "country" in field_names
|
||||
assert "gender" in field_names
|
||||
assert "agree_terms" in field_names
|
||||
assert "signature" in field_names
|
||||
|
||||
# Check field types
|
||||
field_types = {f["field"]: f["type"] for f in fields}
|
||||
assert field_types["first_name"] == "string"
|
||||
assert field_types["country"] == "select"
|
||||
assert field_types["gender"] == "boolean"
|
||||
assert field_types["agree_terms"] == "boolean"
|
||||
assert field_types["signature"] == "string"
|
||||
|
||||
def test_no_form_pdf_fields(self, test_pdf_files):
|
||||
"""Test field discovery on PDF without form fields."""
|
||||
fields = discover_fields(test_pdf_files["no_form"])
|
||||
|
||||
assert len(fields) == 0
|
||||
|
||||
def test_large_form_pdf_fields(self, test_pdf_files):
|
||||
"""Test field discovery on large PDF without form fields."""
|
||||
fields = discover_fields(test_pdf_files["large_form"])
|
||||
|
||||
assert len(fields) == 0
|
||||
|
||||
def test_pdf_field_labels_generated_correctly(self, test_pdf_files):
|
||||
"""Test that field labels are generated correctly."""
|
||||
fields = discover_fields(test_pdf_files["simple_form"])
|
||||
|
||||
field_labels = {f["field"]: f["label"] for f in fields}
|
||||
assert field_labels["name"] == "Name"
|
||||
assert field_labels["email"] == "Email"
|
||||
assert field_labels["phone"] == "Phone"
|
||||
assert field_labels["country"] == "Country"
|
||||
assert field_labels["birth_date"] == "Birth Date"
|
||||
assert field_labels["agree_terms"] == "Agree Terms"
|
||||
|
||||
def test_pdf_field_options_extracted_correctly(self, test_pdf_files):
|
||||
"""Test that dropdown options are extracted correctly."""
|
||||
fields = discover_fields(test_pdf_files["simple_form"])
|
||||
|
||||
country_field = next(f for f in fields if f["field"] == "country")
|
||||
assert country_field["type"] == "select"
|
||||
assert country_field["options"] is not None
|
||||
assert len(country_field["options"]) == 5
|
||||
assert "USA" in country_field["options"]
|
||||
assert "Canada" in country_field["options"]
|
||||
assert "UK" in country_field["options"]
|
||||
assert "Germany" in country_field["options"]
|
||||
assert "France" in country_field["options"]
|
||||
|
||||
|
||||
class TestCompleteWorkflow:
|
||||
"""Test complete document lifecycle workflows."""
|
||||
|
||||
def test_complete_document_lifecycle(self, test_client, test_pdf_files, sample_auth_token):
|
||||
"""Test complete document lifecycle: upload, get metadata, get fields, delete."""
|
||||
# Upload document
|
||||
with open(test_pdf_files["simple_form"], "rb") as f:
|
||||
files = {"file": ("simple_form.pdf", f, "application/pdf")}
|
||||
data = {"org_id": "test-org-123"}
|
||||
headers = {"Authorization": sample_auth_token}
|
||||
|
||||
upload_response = test_client.post(
|
||||
"/api/documents/upload",
|
||||
files=files,
|
||||
data=data,
|
||||
headers=headers
|
||||
)
|
||||
|
||||
if upload_response.status_code == 201:
|
||||
document_id = upload_response.json()["document_id"]
|
||||
|
||||
# Get metadata
|
||||
headers = {"Authorization": sample_auth_token}
|
||||
metadata_response = test_client.get(
|
||||
f"/api/documents/{document_id}",
|
||||
params={"org_id": "test-org-123"},
|
||||
headers=headers
|
||||
)
|
||||
|
||||
# Get fields
|
||||
fields_response = test_client.get(
|
||||
f"/api/documents/{document_id}/fields",
|
||||
params={"org_id": "test-org-123"},
|
||||
headers=headers
|
||||
)
|
||||
|
||||
# Get download URL
|
||||
download_response = test_client.get(
|
||||
f"/api/documents/{document_id}/download-url",
|
||||
params={"org_id": "test-org-123"},
|
||||
headers=headers
|
||||
)
|
||||
|
||||
# Delete document
|
||||
delete_response = test_client.delete(
|
||||
f"/api/documents/{document_id}",
|
||||
params={"org_id": "test-org-123"},
|
||||
headers=headers
|
||||
)
|
||||
|
||||
# Verify all operations succeeded
|
||||
assert upload_response.status_code == 201
|
||||
assert metadata_response.status_code in [200, 404] # May be 404 if S3 not available
|
||||
assert fields_response.status_code in [200, 404]
|
||||
assert download_response.status_code in [200, 404]
|
||||
assert delete_response.status_code in [200, 404]
|
||||
Reference in New Issue
Block a user