From af780224bb9b21467cdb2e260802a9094f11f615 Mon Sep 17 00:00:00 2001 From: Bryan Brancotte <bryan.brancotte@pasteur.fr> Date: Wed, 19 Feb 2025 16:29:27 +0100 Subject: [PATCH 1/2] cleanup --- src/strass/test_data/README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/strass/test_data/README.md b/src/strass/test_data/README.md index 5204054d..5a24bdc4 100644 --- a/src/strass/test_data/README.md +++ b/src/strass/test_data/README.md @@ -10,6 +10,4 @@ writer = PdfWriter(clone_from="cv.pdf") writer.add_js('app.alert("ICanSubmitTheContentOfThisFileAnywhere.");') with open("cv-with-js.pdf", "wb") as fp: writer.write(fp) - -writer = PdfWriter(clone_from="cv-with-js.pdf") ``` \ No newline at end of file -- GitLab From 0883b3e4a48ea580e28cd984d1d98d7784c62ad1 Mon Sep 17 00:00:00 2001 From: Bryan Brancotte <bryan.brancotte@pasteur.fr> Date: Wed, 19 Feb 2025 16:31:58 +0100 Subject: [PATCH 2/2] convert to PostScript to mitigate attack with pdf, test independently each method --- src/strass/Dockerfile | 1 + .../strass_app/tests/test_sanitize_pdf.py | 14 ++++++++ src/strass/strass_app/utils.py | 36 ++++++++++++++++++- 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/strass/Dockerfile b/src/strass/Dockerfile index 82313de8..435156d6 100644 --- a/src/strass/Dockerfile +++ b/src/strass/Dockerfile @@ -20,6 +20,7 @@ RUN addgroup --gid 1000 kiwi \ gcc \ g++ \ libmagic1 \ + ghostscript \ && rm -rf /var/lib/apt/lists/* \ && python -m pip install --upgrade pip \ && pip install csscompressor gunicorn diff --git a/src/strass/strass_app/tests/test_sanitize_pdf.py b/src/strass/strass_app/tests/test_sanitize_pdf.py index fab61988..95360005 100644 --- a/src/strass/strass_app/tests/test_sanitize_pdf.py +++ b/src/strass/strass_app/tests/test_sanitize_pdf.py @@ -21,3 +21,17 @@ class SafePDFTestCase(TooledTestCase): cv.seek(0) my_io = utils.safe_pdf(cv) self.check_no_js(my_io) + + def test_pdf_file_cleanup_with_js(self): + cv = open(os.path.join(self.test_data, "cv-with-js.pdf"), "rb") + self.assertIn('ICanSubmitTheContentOfThisFileAnywhere', str(cv.read())) + cv.seek(0) + my_io = utils._pdf_file_cleanup(cv) + self.check_no_js(my_io) + + def test_pdf_ps_pdf_with_js(self): + cv = open(os.path.join(self.test_data, "cv-with-js.pdf"), "rb") + self.assertIn('ICanSubmitTheContentOfThisFileAnywhere', str(cv.read())) + cv.seek(0) + my_io = utils._pdf_ps_pdf(cv) + self.check_no_js(my_io) diff --git a/src/strass/strass_app/utils.py b/src/strass/strass_app/utils.py index 7268d2c6..5c9a0079 100644 --- a/src/strass/strass_app/utils.py +++ b/src/strass/strass_app/utils.py @@ -1,5 +1,6 @@ import functools import logging +import subprocess from abc import abstractmethod from io import BytesIO from typing import IO, Any @@ -283,7 +284,10 @@ def _pdf_object_cleanup(pdf_file, obj): del obj[k] -def safe_pdf(my_stream: IO[Any]): +def _pdf_file_cleanup(my_stream: IO[Any]): + """ + Strip out Javascript code from the pdf file + """ writer = PdfWriter(clone_from=my_stream) _pdf_object_cleanup(writer, writer.root_object.get('/Names', {})) @@ -295,6 +299,36 @@ def safe_pdf(my_stream: IO[Any]): return myio +def _pdf_ps_pdf(my_stream: IO[Any]): + """ + Convert file to posts script and then back to pdf to purge code and dynamic content + """ + pdf2ps = subprocess.Popen( + ('pdf2ps', '-dLanguageLevel=1', '-', '-'), + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + ) + cv_ps = pdf2ps.communicate(input=my_stream.read())[0] + pdf2ps.wait() + + ps2pdf = subprocess.Popen( + ('ps2pdf', '-', '-'), + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + ) + cv_safe = ps2pdf.communicate(input=cv_ps)[0] + ps2pdf.wait() + + myio = BytesIO(cv_safe) + return myio + + +def safe_pdf(my_stream: IO[Any]): + my_stream = _pdf_file_cleanup(my_stream) + my_stream = _pdf_ps_pdf(my_stream) + return my_stream + + def use_markdown_or_plain_text_message() -> str: if live_settings.markdown_enabled__bool: return _('You can use markdown here.') -- GitLab