diff --git a/CHANGELOG.md b/CHANGELOG.md index 91b0600a5..6c0970456 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,16 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Added +#### Image redactor +* Added abstract class `QRRecognizer` for QR code recognizers +* Added `OpenCVQRRecongnizer` which uses OpenCV to recognize QR codes +* Added `QRImageAnalyzerEngine` which uses `QRRecognizer` for QR code recognition and `AnalyzerEngine` to analyze its contents for PII entities + +### Changed +#### Image redactor +* Modified `ImagePiiVerifyEngine` and `ImageRedactorEngine` to allow using `QRImageAnalyzerEngine` as an alternative to `ImageAnalyzerEngine` + ## [2.2.32] - 25.01.2023 ### Changed #### General diff --git a/docs/assets/qr-image-redactor-design.png b/docs/assets/qr-image-redactor-design.png new file mode 100644 index 000000000..600d18474 Binary files /dev/null and b/docs/assets/qr-image-redactor-design.png differ diff --git a/presidio-image-redactor/Dockerfile b/presidio-image-redactor/Dockerfile index 25718f897..c86fa01c4 100644 --- a/presidio-image-redactor/Dockerfile +++ b/presidio-image-redactor/Dockerfile @@ -6,7 +6,7 @@ ENV PIP_NO_CACHE_DIR=1 WORKDIR /usr/bin/${NAME} RUN apt-get update \ - && apt-get install tesseract-ocr -y \ + && apt-get install tesseract-ocr ffmpeg libsm6 libxext6 -y \ && rm -rf /var/lib/apt/lists/* \ && tesseract -v diff --git a/presidio-image-redactor/Pipfile b/presidio-image-redactor/Pipfile index 61a1d7f88..489a3ab79 100644 --- a/presidio-image-redactor/Pipfile +++ b/presidio-image-redactor/Pipfile @@ -12,6 +12,8 @@ pydicom = ">=2.3.0" pypng = ">=0.20220715.0" matplotlib = "==3.6.2" typing-extensions = "*" +opencv-python = ">=4.5.0" +importlib-resources = "*" [dev-packages] pytest = "*" diff --git a/presidio-image-redactor/Pipfile.lock b/presidio-image-redactor/Pipfile.lock index 7927a0461..ac9568a4e 100644 --- a/presidio-image-redactor/Pipfile.lock +++ b/presidio-image-redactor/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "9e70eecf64a27791e5d64ca06274a2d7ddb4deafc832d7ed5f8fd9c1c54c739a" + "sha256": "c67582e5c0fea3cc2cf86888c1adb1a45cd1015f0791c5348917c95b9a70c760" }, "pipfile-spec": 6, "requires": {}, @@ -306,6 +306,14 @@ "markers": "python_version >= '3.5'", "version": "==3.4" }, + "importlib-resources": { + "hashes": [ + "sha256:4be82589bf5c1d7999aedf2a45159d10cb3ca4f19b2271f8792bc8e6da7b22f6", + "sha256:7b1deeebbf351c7578e09bf2f63fa2ce8b5ffec296e0d349139d43cca061a81a" + ], + "index": "pypi", + "version": "==5.12.0" + }, "itsdangerous": { "hashes": [ "sha256:2c2349112351b88699d8d4b6b075022c0808887cb7ad10069318a8b0bc88db44", @@ -575,6 +583,19 @@ "markers": "python_version >= '3.8'", "version": "==1.24.2" }, + "opencv-python": { + "hashes": [ + "sha256:3424794a711f33284581f3c1e4b071cfc827d02b99d6fd9a35391f517c453306", + "sha256:7a297e7651e22eb17c265ddbbc80e2ba2a8ff4f4a1696a67c45e5f5798245842", + "sha256:812af57553ec1c6709060c63f6b7e9ad07ddc0f592f3ccc6d00c71e0fe0e6376", + "sha256:cd08343654c6b88c5a8c25bf425f8025aed2e3189b4d7306b5861d32affaf737", + "sha256:d4f8880440c433a0025d78804dda6901d1e8e541a561dda66892d90290aef881", + "sha256:ebfc0a3a2f57716e709028b992e4de7fd8752105d7a768531c4f434043c6f9ff", + "sha256:eda115797b114fc16ca6f182b91c5d984f0015c19bec3145e55d33d708e9bae1" + ], + "index": "pypi", + "version": "==4.7.0.72" + }, "packaging": { "hashes": [ "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2", @@ -964,6 +985,7 @@ "sha256:9d3de8591bd6f6522594406fa46a6418eabd0562dacb267f8556675762801514", "sha256:ed4e75fafe103c79b692f217158ba87edf38d31004b9dbc1913debb48793c828" ], + "index": "pypi", "markers": "python_version >= '3.7'", "version": "==67.3.3" }, @@ -1064,6 +1086,7 @@ "sha256:f74c64934423bcc2d3508cf3a079c7034e5cde988255dc57c7a09794c78f0610", "sha256:facab907801fbcb0e54b3532e04bc6a0709184d68004ef3a129e8c7e3ca63d82" ], + "index": "pypi", "markers": "python_version >= '3.6'", "version": "==2.4.5" }, diff --git a/presidio-image-redactor/README.MD b/presidio-image-redactor/README.MD index 39f05c92a..0c34895d5 100644 --- a/presidio-image-redactor/README.MD +++ b/presidio-image-redactor/README.MD @@ -16,6 +16,10 @@ Process for standard images: ![Image Redactor Design](../docs/assets/image-redactor-design.png) +Process for images with QR codes: + +![QRImage Redactor Design](../docs/assets/qr-image-redactor-design.png) + Process for DICOM files: ![DICOM image Redactor Design](../docs/assets/dicom-image-redactor-design.png) @@ -117,6 +121,30 @@ curl -XPOST "http://localhost:3000/redact" -H "content-type: multipart/form-data Python script example can be found under: /presidio/e2e-tests/tests/test_image_redactor.py +## Getting started (images with QR codes) + +`QRImageAnalyzerEngine` is used by `ImageRedactorEngineto` to redact QR codes. + +```python +from PIL import Image +from presidio_image_redactor import ImageRedactorEngine +from presidio_image_redactor import QRImageAnalyzerEngine + +# Get the image to redact using PIL lib (pillow) +image = Image.open("presidio-image-redactor/tests/integration/resources/qr.png") + +# Initialize the engine +engine = ImageRedactorEngine(image_analyzer_engine=QRImageAnalyzerEngine()) + +# Redact the image with pink color +redacted_image = engine.redact(image, (255, 192, 203)) + +# save the redacted image +redacted_image.save("new_image.png") +# uncomment to open the image for viewing +# redacted_image.show() +``` + ## Getting started (DICOM images) This module only redacts pixel data and does not scrub text PHI which may exist in the DICOM metadata. diff --git a/presidio-image-redactor/presidio_image_redactor/__init__.py b/presidio-image-redactor/presidio_image_redactor/__init__.py index b51383a61..1cd5b2c28 100644 --- a/presidio-image-redactor/presidio_image_redactor/__init__.py +++ b/presidio-image-redactor/presidio_image_redactor/__init__.py @@ -5,11 +5,13 @@ from .tesseract_ocr import TesseractOCR from .bbox import BboxProcessor from .image_analyzer_engine import ImageAnalyzerEngine +from .qr_image_analyzer_engine import QRImageAnalyzerEngine from .image_redactor_engine import ImageRedactorEngine from .image_pii_verify_engine import ImagePiiVerifyEngine from .dicom_image_redactor_engine import DicomImageRedactorEngine from .dicom_image_pii_verify_engine import DicomImagePiiVerifyEngine + # Set up default logging (with NullHandler) logging.getLogger("presidio-image-redactor").addHandler(logging.NullHandler()) @@ -18,6 +20,7 @@ "TesseractOCR", "BboxProcessor", "ImageAnalyzerEngine", + "QRImageAnalyzerEngine", "ImageRedactorEngine", "ImagePiiVerifyEngine", "DicomImageRedactorEngine", diff --git a/presidio-image-redactor/presidio_image_redactor/image_pii_verify_engine.py b/presidio-image-redactor/presidio_image_redactor/image_pii_verify_engine.py index 8e5f0d0fa..123eaa150 100644 --- a/presidio-image-redactor/presidio_image_redactor/image_pii_verify_engine.py +++ b/presidio-image-redactor/presidio_image_redactor/image_pii_verify_engine.py @@ -1,9 +1,10 @@ from PIL import Image, ImageChops from presidio_image_redactor.image_analyzer_engine import ImageAnalyzerEngine +from presidio_image_redactor import QRImageAnalyzerEngine import matplotlib import io from matplotlib import pyplot as plt -from typing import Optional +from typing import Optional, Union def fig2img(fig): @@ -19,7 +20,10 @@ def fig2img(fig): class ImagePiiVerifyEngine: """ImagePiiVerifyEngine class only supporting Pii verification currently.""" - def __init__(self, image_analyzer_engine: Optional[ImageAnalyzerEngine] = None): + def __init__( + self, + image_analyzer_engine: Union[ImageAnalyzerEngine, QRImageAnalyzerEngine] = None, + ): if not image_analyzer_engine: image_analyzer_engine = ImageAnalyzerEngine() self.image_analyzer_engine = image_analyzer_engine @@ -42,9 +46,12 @@ def verify( image = ImageChops.duplicate(image) image_x, image_y = image.size - bboxes = self.image_analyzer_engine.analyze( - image, ocr_kwargs, **text_analyzer_kwargs - ) + if isinstance(self.image_analyzer_engine, QRImageAnalyzerEngine): + bboxes = self.image_analyzer_engine.analyze(image, **text_analyzer_kwargs) + else: + bboxes = self.image_analyzer_engine.analyze( + image, ocr_kwargs, **text_analyzer_kwargs + ) fig, ax = plt.subplots() image_r = 70 fig.set_size_inches(image_x / image_r, image_y / image_r) diff --git a/presidio-image-redactor/presidio_image_redactor/image_redactor_engine.py b/presidio-image-redactor/presidio_image_redactor/image_redactor_engine.py index 411aebcab..fd7adafe3 100644 --- a/presidio-image-redactor/presidio_image_redactor/image_redactor_engine.py +++ b/presidio-image-redactor/presidio_image_redactor/image_redactor_engine.py @@ -2,7 +2,11 @@ from PIL import Image, ImageDraw, ImageChops -from presidio_image_redactor import ImageAnalyzerEngine, BboxProcessor +from presidio_image_redactor import ( + ImageAnalyzerEngine, + QRImageAnalyzerEngine, + BboxProcessor, +) class ImageRedactorEngine: @@ -11,7 +15,10 @@ class ImageRedactorEngine: :param image_analyzer_engine: Engine which performs OCR + PII detection. """ - def __init__(self, image_analyzer_engine: ImageAnalyzerEngine = None): + def __init__( + self, + image_analyzer_engine: Union[ImageAnalyzerEngine, QRImageAnalyzerEngine] = None, + ): if not image_analyzer_engine: self.image_analyzer_engine = ImageAnalyzerEngine() else: @@ -42,9 +49,12 @@ def redact( image = ImageChops.duplicate(image) - bboxes = self.image_analyzer_engine.analyze( - image, ocr_kwargs, **text_analyzer_kwargs - ) + if isinstance(self.image_analyzer_engine, QRImageAnalyzerEngine): + bboxes = self.image_analyzer_engine.analyze(image, **text_analyzer_kwargs) + else: + bboxes = self.image_analyzer_engine.analyze( + image, ocr_kwargs, **text_analyzer_kwargs + ) draw = ImageDraw.Draw(image) for box in bboxes: diff --git a/presidio-image-redactor/presidio_image_redactor/qr_image_analyzer_engine.py b/presidio-image-redactor/presidio_image_redactor/qr_image_analyzer_engine.py new file mode 100644 index 000000000..04515fbfc --- /dev/null +++ b/presidio-image-redactor/presidio_image_redactor/qr_image_analyzer_engine.py @@ -0,0 +1,62 @@ +from typing import List, Optional + +from presidio_analyzer import AnalyzerEngine + +from presidio_image_redactor.entities import ImageRecognizerResult +from presidio_image_redactor.qr_recognizer import QRRecognizer +from presidio_image_redactor.qr_recognizer import OpenCVQRRecongnizer + + +class QRImageAnalyzerEngine: + """QRImageAnalyzerEngine class. + + :param analyzer_engine: The Presidio AnalyzerEngine instance + to be used to detect PII in text + :param qr: the QRRecognizer object to detect and decode text in QR codes + """ + + def __init__( + self, + analyzer_engine: Optional[AnalyzerEngine] = None, + qr: Optional[QRRecognizer] = None, + ): + if not analyzer_engine: + analyzer_engine = AnalyzerEngine() + self.analyzer_engine = analyzer_engine + + if not qr: + qr = OpenCVQRRecongnizer() + self.qr = qr + + def analyze( + self, image: object, **text_analyzer_kwargs + ) -> List[ImageRecognizerResult]: + """Analyse method to analyse the given image. + + :param image: PIL Image/numpy array to be processed. + :param text_analyzer_kwargs: Additional values for the analyze method + in AnalyzerEngine. + + :return: List of the extract entities with image bounding boxes. + """ + bboxes = [] + + qr_result = self.qr.recognize(image) + for qr_code in qr_result: + analyzer_result = self.analyzer_engine.analyze( + text=qr_code.text, language="en", **text_analyzer_kwargs + ) + for res in analyzer_result: + bboxes.append( + ImageRecognizerResult( + res.entity_type, + res.start, + res.end, + res.score, + qr_code.bbox[0], + qr_code.bbox[1], + qr_code.bbox[2], + qr_code.bbox[3], + ) + ) + return bboxes diff --git a/presidio-image-redactor/presidio_image_redactor/qr_recognizer.py b/presidio-image-redactor/presidio_image_redactor/qr_recognizer.py new file mode 100644 index 000000000..73dc18520 --- /dev/null +++ b/presidio-image-redactor/presidio_image_redactor/qr_recognizer.py @@ -0,0 +1,155 @@ +from abc import ABC, abstractmethod +from typing import Tuple, List, Optional +import cv2 +import numpy as np + + +class QRRecognizerResult: + """ + Represent the results of analysing the image by QRRecognizer. + + :param text: Decoded text + :param bbox: Bounding box in the following format - [left, top, width, height] + :param polygon: Polygon aroung QR code + """ + + def __init__( + self, + text: str, + bbox: Tuple[int, int, int, int], + polygon: Optional[List[int]] = None, + ): + self.text = text + self.bbox = bbox + self.polygon = polygon + + def __eq__(self, other): + """ + Compare two QRRecognizerResult objects. + + :param other: another QRRecognizerResult object + :return: bool + """ + equal_text = self.text == other.text + equal_bbox = self.bbox == other.bbox + equal_polygon = self.polygon == other.polygon + + return equal_text and equal_bbox and equal_polygon + + def __repr__(self) -> str: + """Return a string representation of the instance.""" + return ( + f"{type(self).__name__}(" + f"text={self.text}, " + f"bbox={self.bbox}, " + f"polygon={self.polygon})" + ) + + +class QRRecognizer(ABC): + """ + A class representing an abstract QR code recognizer. + + QRRecognizer is an abstract class to be inherited by + recognizers which hold the logic for recognizing QR codes on the images. + """ + + @abstractmethod + def recognize(self, image: object) -> List[QRRecognizerResult]: + """Detect and decode QR codes on the image. + + :param image: PIL Image/numpy array to be processed + + :return: List of the recognized QR codes + """ + + +class OpenCVQRRecongnizer(QRRecognizer): + """ + QR code recognition using OpenCV. + + Example of the usage: + from presidio_image_redactor import OpenCVQRRecognizer + + image = cv2.imread("qrcode.jpg") + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + recognized = OpenCVQRRecongnizer().recognize(image) + """ + + def __init__(self) -> None: + self.detector = cv2.QRCodeDetector() + + def recognize(self, image: object) -> List[QRRecognizerResult]: + """Detect and decode QR codes on the image. + + :param image: PIL Image/numpy array to be processed + + :return: List of the recognized QR codes + """ + + if not isinstance(image, np.ndarray): + image = np.array(image, dtype=np.uint8) + + ret, points = self._detect(image) + + if ret: + decoded = self._decode(image, points) + + recognized = [ + QRRecognizerResult( + text=text, + bbox=cv2.boundingRect(point), + polygon=self._get_ploygon(point), + ) + for text, point in zip(decoded, points) + ] + + else: + recognized = [] + + return recognized + + def _detect(self, image: object) -> Tuple[float, Optional[np.ndarray]]: + """Detect QR codes on the image. + + :param image: Numpy array to be processed + + :return: Detection status and list of the points around QR codes + """ + + ret, points = self.detector.detectMulti(image) + + if not ret: + ret, points = self.detector.detect(image) + if points is not None: + points = points.astype(int) + + return ret, points + + def _decode(self, image: object, points: np.ndarray) -> Tuple[str]: + """Decode QR codes on the image. + + :param image: Numpy array to be processed + :param points: Detected points + + :return: Tuple with decoded QR codes + """ + + if len(points) == 1: + decoded, _ = self.detector.decode(image, points) + decoded = (decoded,) + else: + _, decoded, _ = self.detector.decodeMulti(image, points) + + return decoded + + def _get_ploygon(self, points: np.ndarray) -> List[int]: + """Convert a list of points to a polygon. + + :param points: Points around the QR code + + :return: Polygon + """ + + return [*points.flatten(), *points[0]] diff --git a/presidio-image-redactor/setup.py b/presidio-image-redactor/setup.py index 0f704cc51..57bc14d79 100644 --- a/presidio-image-redactor/setup.py +++ b/presidio-image-redactor/setup.py @@ -12,6 +12,8 @@ "pydantic==1.7.4", "pydicom>=2.3.0", "pypng>=0.20220715.0", + "opencv-python>=4.5.0", + "importlib-resources", ] test_requirements = ["pytest>=3", "pytest-mock>=3.10.0", "flake8==3.7.9"] diff --git a/presidio-image-redactor/tests/conftest.py b/presidio-image-redactor/tests/conftest.py index b4ad6d4ee..b82670082 100644 --- a/presidio-image-redactor/tests/conftest.py +++ b/presidio-image-redactor/tests/conftest.py @@ -4,6 +4,7 @@ from presidio_analyzer.recognizer_result import RecognizerResult from presidio_image_redactor import ImageAnalyzerEngine +from presidio_image_redactor import QRImageAnalyzerEngine from presidio_image_redactor.entities import ImageRecognizerResult import pytest @@ -52,6 +53,11 @@ def image_analyzer_engine(): return ImageAnalyzerEngine() +@pytest.fixture(scope="module") +def qr_image_analyzer_engine(): + return QRImageAnalyzerEngine() + + @pytest.fixture(scope="module") def get_mock_dicom_instance(): """DICOM instance to use in testing""" diff --git a/presidio-image-redactor/tests/integration/resources/qr.png b/presidio-image-redactor/tests/integration/resources/qr.png new file mode 100644 index 000000000..2e65443d6 Binary files /dev/null and b/presidio-image-redactor/tests/integration/resources/qr.png differ diff --git a/presidio-image-redactor/tests/integration/resources/qr_multi.png b/presidio-image-redactor/tests/integration/resources/qr_multi.png new file mode 100644 index 000000000..a493c1fb4 Binary files /dev/null and b/presidio-image-redactor/tests/integration/resources/qr_multi.png differ diff --git a/presidio-image-redactor/tests/integration/resources/qr_multi_redacted.png b/presidio-image-redactor/tests/integration/resources/qr_multi_redacted.png new file mode 100644 index 000000000..91c5c5b88 Binary files /dev/null and b/presidio-image-redactor/tests/integration/resources/qr_multi_redacted.png differ diff --git a/presidio-image-redactor/tests/integration/resources/qr_no_pii.png b/presidio-image-redactor/tests/integration/resources/qr_no_pii.png new file mode 100644 index 000000000..d41b1e2bb Binary files /dev/null and b/presidio-image-redactor/tests/integration/resources/qr_no_pii.png differ diff --git a/presidio-image-redactor/tests/integration/resources/qr_redacted.png b/presidio-image-redactor/tests/integration/resources/qr_redacted.png new file mode 100644 index 000000000..0244a9113 Binary files /dev/null and b/presidio-image-redactor/tests/integration/resources/qr_redacted.png differ diff --git a/presidio-image-redactor/tests/integration/resources/qr_verify.png b/presidio-image-redactor/tests/integration/resources/qr_verify.png new file mode 100644 index 000000000..6c961c6cd Binary files /dev/null and b/presidio-image-redactor/tests/integration/resources/qr_verify.png differ diff --git a/presidio-image-redactor/tests/integration/test_image_pii_verify_engine.py b/presidio-image-redactor/tests/integration/test_image_pii_verify_engine.py index 244c56a76..7f6325779 100644 --- a/presidio-image-redactor/tests/integration/test_image_pii_verify_engine.py +++ b/presidio-image-redactor/tests/integration/test_image_pii_verify_engine.py @@ -7,3 +7,18 @@ def test_given_image_without_text_and_pii_verify_then_image_does_not_change(): image = get_resource_image("no_ocr.png") pii_verifying_image = ImagePiiVerifyEngine().verify(image) assert compare_images(pii_verifying_image, image) + + +def test_given_qr_image_with_pii_then_image_is_changed(qr_image_analyzer_engine): + # QR image with PII entities + image = get_resource_image("qr.png") + result_image = get_resource_image("qr_verify.png") + pii_verifying_image = ImagePiiVerifyEngine(image_analyzer_engine=qr_image_analyzer_engine).verify(image) + assert compare_images(pii_verifying_image, result_image) + + +def test_given_qr_image_without_pii_then_image_does_not_change(qr_image_analyzer_engine): + # QR image without PII entities + image = get_resource_image("qr_no_pii.png") + pii_verifying_image = ImagePiiVerifyEngine(image_analyzer_engine=qr_image_analyzer_engine).verify(image) + assert compare_images(pii_verifying_image, image) diff --git a/presidio-image-redactor/tests/integration/test_image_redactor_engine.py b/presidio-image-redactor/tests/integration/test_image_redactor_engine.py index de04ccb84..8cb4bbe35 100644 --- a/presidio-image-redactor/tests/integration/test_image_redactor_engine.py +++ b/presidio-image-redactor/tests/integration/test_image_redactor_engine.py @@ -59,3 +59,18 @@ def test_given_analzyer_kwargs_then_different_entities_are_redacted(): assert not compare_images(redacted_image_no_args, redacted_image_entities_args) assert not compare_images(redacted_image_no_args, redacted_image_score_args) assert not compare_images(redacted_image_entities_args, redacted_image_score_args) + + +def test_given_qr_image_with_pii_then_qr_bbox_is_redacted(qr_image_analyzer_engine): + # QR image with PII entities + image = get_resource_image("qr.png") + result_image = get_resource_image("qr_redacted.png") + redacted_image = ImageRedactorEngine(image_analyzer_engine=qr_image_analyzer_engine).redact(image) + assert compare_images(redacted_image, result_image) + + +def test_given_qr_image_without_pii_then_image_does_not_change(qr_image_analyzer_engine): + # QR image without PII entities + image = get_resource_image("qr_no_pii.png") + redacted_image = ImageRedactorEngine(image_analyzer_engine=qr_image_analyzer_engine).redact(image) + assert compare_images(redacted_image, image) \ No newline at end of file diff --git a/presidio-image-redactor/tests/integration/test_qr_image_analyzer_engine_integration.py b/presidio-image-redactor/tests/integration/test_qr_image_analyzer_engine_integration.py new file mode 100644 index 000000000..ace5103c4 --- /dev/null +++ b/presidio-image-redactor/tests/integration/test_qr_image_analyzer_engine_integration.py @@ -0,0 +1,39 @@ +from presidio_image_redactor.entities import ImageRecognizerResult +from tests.integration.methods import get_resource_image + + +def test_given_qr_image_then_text_entities_are_recognized_correctly( + qr_image_analyzer_engine, +): + # QR image with PII entities + image = get_resource_image("qr.png") + analyzer_results = qr_image_analyzer_engine.analyze(image) + assert len(analyzer_results) == 1 + assert analyzer_results[0] == ImageRecognizerResult( + entity_type="URL", + start=0, + end=37, + score=0.6, + left=71, + top=71, + width=1013, + height=1013, + ) + + +def test_given_qr_image_without_pii_then_no_entities_are_recognized( + qr_image_analyzer_engine, +): + # QR image without PII entities + image = get_resource_image("qr_no_pii.png") + analyzer_results = qr_image_analyzer_engine.analyze(image) + assert len(analyzer_results) == 0 + + +def test_given_mage_without_qr_then_no_entities_are_recognized( + qr_image_analyzer_engine, +): + # Image without QR codes + image = get_resource_image("ocr_test.png") + analyzer_results = qr_image_analyzer_engine.analyze(image) + assert len(analyzer_results) == 0 diff --git a/presidio-image-redactor/tests/test_qr_recognizer.py b/presidio-image-redactor/tests/test_qr_recognizer.py new file mode 100644 index 000000000..6c8af2f16 --- /dev/null +++ b/presidio-image-redactor/tests/test_qr_recognizer.py @@ -0,0 +1,39 @@ +import cv2 +import numpy as np + +import pytest + +from tests.integration.methods import get_resource_image + +from presidio_image_redactor.qr_recognizer import ( + OpenCVQRRecongnizer, + QRRecognizerResult, +) + + +@pytest.fixture(scope="module") +def opencv_qr_recognizer(): + return OpenCVQRRecongnizer() + + +def test_given_image_with_qr_then_opencvqrrecognizer_returns_expected_results( + opencv_qr_recognizer, +): + image = get_resource_image("qr.png") + recognized = opencv_qr_recognizer.recognize(image) + + assert len(recognized) == 1 + assert recognized[0] == QRRecognizerResult( + text="https://github.com/microsoft/presidio", + bbox=(71, 71, 1013, 1013), + polygon=[71, 71, 1083, 71, 1083, 1083, 71, 1083, 71, 71], + ) + + +def test_given_image_without_qr_then_opencvqrrecognizer_returns_empty_list( + opencv_qr_recognizer, +): + image = get_resource_image("no_ocr.jpg") + recognized = opencv_qr_recognizer.recognize(image) + + assert len(recognized) == 0