Replace deprecated PyPDF2 with pypdf (#12203)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Graham Neubig
2026-01-02 16:47:52 -05:00
committed by GitHub
parent ee2ad16442
commit 903c047015
3 changed files with 3 additions and 4 deletions

View File

@@ -22,7 +22,7 @@ import base64
from typing import Any
import docx
import PyPDF2
import pypdf
from pptx import Presentation
from pylatexenc.latex2text import LatexNodes2Text
@@ -42,7 +42,7 @@ def parse_pdf(file_path: str) -> None:
file_path: str: The path to the file to open.
"""
print(f'[Reading PDF file from {file_path}]')
content = PyPDF2.PdfReader(file_path)
content = pypdf.PdfReader(file_path)
text = ''
for page_idx in range(len(content.pages)):
text += (

2
poetry.lock generated
View File

@@ -16824,4 +16824,4 @@ third-party-runtimes = ["daytona", "e2b-code-interpreter", "modal", "runloop-api
[metadata]
lock-version = "2.1"
python-versions = "^3.12,<3.14"
content-hash = "9360db8d9ee46922f780ac13e2954c0b62166efd9c3d1b3cf61a9228889152fa"
content-hash = "ea3a3dcacf87517954778e7b04f0a5865bf213442a7bdbc4f2dc467713dbf82f"

View File

@@ -77,7 +77,6 @@ shellingham = "^1.5.4"
# TODO: Should these go into the runtime group?
ipywidgets = "^8.1.5"
qtconsole = "^5.6.1"
PyPDF2 = "*"
python-pptx = "*"
pylatexenc = "*"
python-docx = "*"