Files
document-service/tests/fixtures/generate_test_pdfs.py

372 lines
13 KiB
Python

"""
Generate test PDF files for document-service testing.
This script creates various test PDFs with actual AcroForm fields:
- Simple form PDF with basic form fields
- Complex form PDF with multiple field types
- No form PDF without form fields
- Large form PDF for size validation testing
"""
import os
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from reportlab.lib import colors
from pypdf import PdfReader, PdfWriter
from pypdf.generic import (
NameObject,
create_string_object,
NumberObject,
ArrayObject,
DictionaryObject,
BooleanObject,
)
# Output directory
OUTPUT_DIR = os.path.dirname(os.path.abspath(__file__))
def create_simple_form_pdf():
"""Create a simple PDF with basic form fields."""
output_path = os.path.join(OUTPUT_DIR, "simple_form.pdf")
# Create base PDF with reportlab
c = canvas.Canvas(output_path, pagesize=letter)
c.setFont("Helvetica", 16)
c.drawString(100, 750, "Simple Form Test")
c.setFont("Helvetica", 12)
c.drawString(100, 700, "Name:")
c.drawString(100, 650, "Email:")
c.drawString(100, 600, "Phone:")
c.drawString(100, 550, "Country:")
c.drawString(100, 500, "Birth Date:")
c.drawString(100, 450, "Agree to Terms:")
c.save()
# Add actual form fields using pypdf
reader = PdfReader(output_path)
writer = PdfWriter()
# Copy the page
page = reader.pages[0]
writer.add_page(page)
# Create form fields
fields = []
# Name field (text)
name_field = DictionaryObject({
NameObject("/FT"): NameObject("/Tx"),
NameObject("/T"): create_string_object("name"),
NameObject("/V"): create_string_object(""),
NameObject("/Rect"): ArrayObject([NumberObject(200), NumberObject(690), NumberObject(400), NumberObject(710)]),
NameObject("/Ff"): NumberObject(0),
})
fields.append(name_field)
# Email field (text)
email_field = DictionaryObject({
NameObject("/FT"): NameObject("/Tx"),
NameObject("/T"): create_string_object("email"),
NameObject("/V"): create_string_object(""),
NameObject("/Rect"): ArrayObject([NumberObject(200), NumberObject(640), NumberObject(400), NumberObject(660)]),
NameObject("/Ff"): NumberObject(0),
})
fields.append(email_field)
# Phone field (text)
phone_field = DictionaryObject({
NameObject("/FT"): NameObject("/Tx"),
NameObject("/T"): create_string_object("phone"),
NameObject("/V"): create_string_object(""),
NameObject("/Rect"): ArrayObject([NumberObject(200), NumberObject(590), NumberObject(400), NumberObject(610)]),
NameObject("/Ff"): NumberObject(0),
})
fields.append(phone_field)
# Country field (dropdown/choice)
country_field = DictionaryObject({
NameObject("/FT"): NameObject("/Ch"),
NameObject("/T"): create_string_object("country"),
NameObject("/V"): create_string_object(""),
NameObject("/Opt"): ArrayObject([
create_string_object("USA"),
create_string_object("Canada"),
create_string_object("UK"),
create_string_object("Germany"),
create_string_object("France"),
]),
NameObject("/Rect"): ArrayObject([NumberObject(200), NumberObject(540), NumberObject(400), NumberObject(560)]),
NameObject("/Ff"): NumberObject(0),
})
fields.append(country_field)
# Birth date field (text)
birth_date_field = DictionaryObject({
NameObject("/FT"): NameObject("/Tx"),
NameObject("/T"): create_string_object("birth_date"),
NameObject("/V"): create_string_object(""),
NameObject("/Rect"): ArrayObject([NumberObject(200), NumberObject(490), NumberObject(400), NumberObject(510)]),
NameObject("/Ff"): NumberObject(0),
})
fields.append(birth_date_field)
# Agree terms field (checkbox)
agree_field = DictionaryObject({
NameObject("/FT"): NameObject("/Btn"),
NameObject("/T"): create_string_object("agree_terms"),
NameObject("/V"): NameObject("/Off"),
NameObject("/Rect"): ArrayObject([NumberObject(200), NumberObject(440), NumberObject(220), NumberObject(460)]),
NameObject("/Ff"): NumberObject(0),
})
fields.append(agree_field)
# Add fields to the page
page = writer.pages[0]
if "/Annots" not in page:
page[NameObject("/Annots")] = ArrayObject()
for field in fields:
field_obj = DictionaryObject({
NameObject("/Subtype"): NameObject("/Widget"),
NameObject("/FT"): field[NameObject("/FT")],
NameObject("/T"): field[NameObject("/T")],
NameObject("/V"): field.get(NameObject("/V"), NameObject("")),
NameObject("/Rect"): field[NameObject("/Rect")],
NameObject("/Ff"): field.get(NameObject("/Ff"), NumberObject(0)),
})
if NameObject("/Opt") in field:
field_obj[NameObject("/Opt")] = field[NameObject("/Opt")]
page[NameObject("/Annots")].append(field_obj)
# Add AcroForm to the document
acroform = DictionaryObject({
NameObject("/Fields"): ArrayObject(fields),
})
writer._root_object[NameObject("/AcroForm")] = acroform
# Save the PDF
with open(output_path, "wb") as f:
writer.write(f)
print(f"Created: {output_path}")
def create_complex_form_pdf():
"""Create a complex PDF with multiple field types."""
output_path = os.path.join(OUTPUT_DIR, "complex_form.pdf")
# Create base PDF with reportlab
c = canvas.Canvas(output_path, pagesize=letter)
c.setFont("Helvetica", 16)
c.drawString(100, 750, "Complex Form Test")
c.setFont("Helvetica", 12)
c.drawString(100, 700, "First Name:")
c.drawString(100, 650, "Last Name:")
c.drawString(100, 600, "Email:")
c.drawString(100, 550, "Phone:")
c.drawString(100, 500, "Address:")
c.drawString(100, 450, "City:")
c.drawString(100, 400, "State:")
c.drawString(100, 350, "Zip Code:")
c.drawString(100, 300, "Country:")
c.drawString(100, 250, "Gender:")
c.drawString(100, 200, "Interests:")
c.drawString(100, 150, "Agree to Terms:")
c.drawString(100, 100, "Signature:")
c.save()
# Add actual form fields using pypdf
reader = PdfReader(output_path)
writer = PdfWriter()
# Copy the page
page = reader.pages[0]
writer.add_page(page)
# Create form fields
fields = []
# Text fields
text_fields = [
('first_name', 200, 690),
('last_name', 200, 640),
('email', 200, 590),
('phone', 200, 540),
('address', 200, 490),
('city', 200, 440),
('state', 200, 390),
('zip_code', 200, 340),
]
for name, x, y in text_fields:
field = DictionaryObject({
NameObject("/FT"): NameObject("/Tx"),
NameObject("/T"): create_string_object(name),
NameObject("/V"): create_string_object(""),
NameObject("/Rect"): ArrayObject([NumberObject(x), NumberObject(y), NumberObject(x + 200), NumberObject(y + 20)]),
NameObject("/Ff"): NumberObject(0),
})
fields.append(field)
# Country dropdown
country_field = DictionaryObject({
NameObject("/FT"): NameObject("/Ch"),
NameObject("/T"): create_string_object("country"),
NameObject("/V"): create_string_object(""),
NameObject("/Opt"): ArrayObject([
create_string_object("USA"),
create_string_object("Canada"),
create_string_object("UK"),
create_string_object("Germany"),
create_string_object("France"),
]),
NameObject("/Rect"): ArrayObject([NumberObject(200), NumberObject(290), NumberObject(400), NumberObject(310)]),
NameObject("/Ff"): NumberObject(0),
})
fields.append(country_field)
# Radio buttons for gender
male_field = DictionaryObject({
NameObject("/FT"): NameObject("/Btn"),
NameObject("/T"): create_string_object("gender"),
NameObject("/V"): create_string_object("male"),
NameObject("/Rect"): ArrayObject([NumberObject(200), NumberObject(240), NumberObject(220), NumberObject(260)]),
NameObject("/Ff"): NumberObject(0),
})
fields.append(male_field)
female_field = DictionaryObject({
NameObject("/FT"): NameObject("/Btn"),
NameObject("/T"): create_string_object("gender"),
NameObject("/V"): create_string_object("female"),
NameObject("/Rect"): ArrayObject([NumberObject(300), NumberObject(240), NumberObject(320), NumberObject(260)]),
NameObject("/Ff"): NumberObject(0),
})
fields.append(female_field)
# Checkboxes for interests
interests = ['reading', 'sports', 'music', 'travel']
for i, interest in enumerate(interests):
field = DictionaryObject({
NameObject("/FT"): NameObject("/Btn"),
NameObject("/T"): create_string_object(interest),
NameObject("/V"): NameObject("/Off"),
NameObject("/Rect"): ArrayObject([NumberObject(200), NumberObject(190 - (i * 30)), NumberObject(220), NumberObject(210 - (i * 30))]),
NameObject("/Ff"): NumberObject(0),
})
fields.append(field)
# Checkbox for agree terms
agree_field = DictionaryObject({
NameObject("/FT"): NameObject("/Btn"),
NameObject("/T"): create_string_object("agree_terms"),
NameObject("/V"): NameObject("/Off"),
NameObject("/Rect"): ArrayObject([NumberObject(200), NumberObject(140), NumberObject(220), NumberObject(160)]),
NameObject("/Ff"): NumberObject(0),
})
fields.append(agree_field)
# Signature field
signature_field = DictionaryObject({
NameObject("/FT"): NameObject("/Tx"),
NameObject("/T"): create_string_object("signature"),
NameObject("/V"): create_string_object(""),
NameObject("/Rect"): ArrayObject([NumberObject(200), NumberObject(90), NumberObject(400), NumberObject(110)]),
NameObject("/Ff"): NumberObject(0),
})
fields.append(signature_field)
# Add fields to the page
page = writer.pages[0]
if "/Annots" not in page:
page[NameObject("/Annots")] = ArrayObject()
for field in fields:
field_obj = DictionaryObject({
NameObject("/Subtype"): NameObject("/Widget"),
NameObject("/FT"): field[NameObject("/FT")],
NameObject("/T"): field[NameObject("/T")],
NameObject("/V"): field.get(NameObject("/V"), NameObject("")),
NameObject("/Rect"): field[NameObject("/Rect")],
NameObject("/Ff"): field.get(NameObject("/Ff"), NumberObject(0)),
})
if NameObject("/Opt") in field:
field_obj[NameObject("/Opt")] = field[NameObject("/Opt")]
page[NameObject("/Annots")].append(field_obj)
# Add AcroForm to the document
acroform = DictionaryObject({
NameObject("/Fields"): ArrayObject(fields),
})
writer._root_object[NameObject("/AcroForm")] = acroform
# Save the PDF
with open(output_path, "wb") as f:
writer.write(f)
print(f"Created: {output_path}")
def create_no_form_pdf():
"""Create a PDF without form fields."""
output_path = os.path.join(OUTPUT_DIR, "no_form.pdf")
# Create simple PDF without form fields
c = canvas.Canvas(output_path, pagesize=letter)
c.setFont("Helvetica", 16)
c.drawString(100, 750, "No Form Test")
c.setFont("Helvetica", 12)
c.drawString(100, 700, "This PDF has no form fields.")
c.drawString(100, 650, "It is used for testing field discovery")
c.drawString(100, 600, "on documents without AcroForm fields.")
c.save()
print(f"Created: {output_path}")
def create_large_form_pdf():
"""Create a large PDF for size validation testing."""
output_path = os.path.join(OUTPUT_DIR, "large_form.pdf")
# Create a larger PDF with more content
c = canvas.Canvas(output_path, pagesize=letter)
c.setFont("Helvetica", 16)
c.drawString(100, 750, "Large Form Test")
c.setFont("Helvetica", 12)
y = 700
for i in range(50):
c.drawString(100, y, f"Field {i + 1}:")
y -= 50
if y < 50:
c.showPage()
y = 700
c.save()
print(f"Created: {output_path}")
def main():
"""Generate all test PDF files."""
print("Generating test PDF files...")
print(f"Output directory: {OUTPUT_DIR}")
print()
create_simple_form_pdf()
create_complex_form_pdf()
create_no_form_pdf()
create_large_form_pdf()
print()
print("All test PDF files generated successfully!")
if __name__ == "__main__":
main()