diff --git a/src/strass/Dockerfile b/src/strass/Dockerfile index 82313de89911cccae34279a88b054d3b983b600d..435156d62d5277b3b36354d5fa9f48ceb8986bb3 100644 --- a/src/strass/Dockerfile +++ b/src/strass/Dockerfile @@ -20,6 +20,7 @@ RUN addgroup --gid 1000 kiwi \ gcc \ g++ \ libmagic1 \ + ghostscript \ && rm -rf /var/lib/apt/lists/* \ && python -m pip install --upgrade pip \ && pip install csscompressor gunicorn diff --git a/src/strass/strass_app/tests/test_sanitize_pdf.py b/src/strass/strass_app/tests/test_sanitize_pdf.py index fab619886722ffa0fae7d638dba9ba79f7bd1564..953600059e6850d5d8af5558e90f44812bc1f46d 100644 --- a/src/strass/strass_app/tests/test_sanitize_pdf.py +++ b/src/strass/strass_app/tests/test_sanitize_pdf.py @@ -21,3 +21,17 @@ class SafePDFTestCase(TooledTestCase): cv.seek(0) my_io = utils.safe_pdf(cv) self.check_no_js(my_io) + + def test_pdf_file_cleanup_with_js(self): + cv = open(os.path.join(self.test_data, "cv-with-js.pdf"), "rb") + self.assertIn('ICanSubmitTheContentOfThisFileAnywhere', str(cv.read())) + cv.seek(0) + my_io = utils._pdf_file_cleanup(cv) + self.check_no_js(my_io) + + def test_pdf_ps_pdf_with_js(self): + cv = open(os.path.join(self.test_data, "cv-with-js.pdf"), "rb") + self.assertIn('ICanSubmitTheContentOfThisFileAnywhere', str(cv.read())) + cv.seek(0) + my_io = utils._pdf_ps_pdf(cv) + self.check_no_js(my_io) diff --git a/src/strass/strass_app/utils.py b/src/strass/strass_app/utils.py index 7268d2c641263fe0830783a2732d5e4dddfbd5c1..5c9a0079182022f2888c56567080f134c7b5290e 100644 --- a/src/strass/strass_app/utils.py +++ b/src/strass/strass_app/utils.py @@ -1,5 +1,6 @@ import functools import logging +import subprocess from abc import abstractmethod from io import BytesIO from typing import IO, Any @@ -283,7 +284,10 @@ def _pdf_object_cleanup(pdf_file, obj): del obj[k] -def safe_pdf(my_stream: IO[Any]): +def _pdf_file_cleanup(my_stream: IO[Any]): + """ + Strip out Javascript code from the pdf file + """ writer = PdfWriter(clone_from=my_stream) _pdf_object_cleanup(writer, writer.root_object.get('/Names', {})) @@ -295,6 +299,36 @@ def safe_pdf(my_stream: IO[Any]): return myio +def _pdf_ps_pdf(my_stream: IO[Any]): + """ + Convert file to posts script and then back to pdf to purge code and dynamic content + """ + pdf2ps = subprocess.Popen( + ('pdf2ps', '-dLanguageLevel=1', '-', '-'), + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + ) + cv_ps = pdf2ps.communicate(input=my_stream.read())[0] + pdf2ps.wait() + + ps2pdf = subprocess.Popen( + ('ps2pdf', '-', '-'), + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + ) + cv_safe = ps2pdf.communicate(input=cv_ps)[0] + ps2pdf.wait() + + myio = BytesIO(cv_safe) + return myio + + +def safe_pdf(my_stream: IO[Any]): + my_stream = _pdf_file_cleanup(my_stream) + my_stream = _pdf_ps_pdf(my_stream) + return my_stream + + def use_markdown_or_plain_text_message() -> str: if live_settings.markdown_enabled__bool: return _('You can use markdown here.') diff --git a/src/strass/test_data/README.md b/src/strass/test_data/README.md index 5204054da0ad4564071ef40ecd426da809c2a3f2..5a24bdc487f22b53d303acf7139740a5b4985cb8 100644 --- a/src/strass/test_data/README.md +++ b/src/strass/test_data/README.md @@ -10,6 +10,4 @@ writer = PdfWriter(clone_from="cv.pdf") writer.add_js('app.alert("ICanSubmitTheContentOfThisFileAnywhere.");') with open("cv-with-js.pdf", "wb") as fp: writer.write(fp) - -writer = PdfWriter(clone_from="cv-with-js.pdf") ``` \ No newline at end of file