> 激活词: PDF / 处理PDF / 提取文本
def safe_extract(path: str) -> dict:
"""安全提取——限制大小和页数"""
MAX_SIZE = 50 * 1024 * 1024 # 50MB
MAX_PAGES = 200
if not os.path.exists(path):
return {"error": "File not found"}
if os.path.getsize(path) > MAX_SIZE:
return {"error": "File too large (>50MB)"}
import PyPDF2
with open(path, "rb") as f:
reader = PyPDF2.PdfReader(f)
if len(reader.pages) > MAX_PAGES:
return {"error": f"Too many pages ({len(reader.pages)} > 200)"}
return PDFProcessor().extract_text(path)
共 1 个版本