microsoft · vpvpvpvp · Feb 8, 2023 · Feb 8, 2023 · Feb 8, 2023 · Feb 8, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,16 @@ All notable changes to this project will be documented in this file.
 
 ## [Unreleased]
 
+### Added
+#### Image redactor
+* Added abstract class `QRRecognizer` for QR code recognizers
+* Added `OpenCVQRRecongnizer` which uses OpenCV to recognize QR codes
+* Added `QRImageAnalyzerEngine` which uses `QRRecognizer` for QR code recognition and `AnalyzerEngine` to analyze its contents for PII entities
+
+### Changed
+#### Image redactor
+* Modified `ImagePiiVerifyEngine` and `ImageRedactorEngine` to allow using `QRImageAnalyzerEngine` as an alternative to `ImageAnalyzerEngine`
+
 ## [2.2.32] - 25.01.2023
 ### Changed
 #### General

diff --git a/docs/assets/qr-image-redactor-design.png b/docs/assets/qr-image-redactor-design.png
diff --git a/presidio-image-redactor/Dockerfile b/presidio-image-redactor/Dockerfile
@@ -6,7 +6,7 @@ ENV PIP_NO_CACHE_DIR=1
 WORKDIR /usr/bin/${NAME}
 
 RUN apt-get update \
-  && apt-get install tesseract-ocr -y \
+  && apt-get install tesseract-ocr ffmpeg libsm6 libxext6 -y \
   && rm -rf /var/lib/apt/lists/* \
   && tesseract -v
 

diff --git a/presidio-image-redactor/Pipfile b/presidio-image-redactor/Pipfile
@@ -12,6 +12,8 @@ pydicom = ">=2.3.0"
 pypng = ">=0.20220715.0"
 matplotlib = "==3.6.2"
 typing-extensions = "*"
+opencv-python = ">=4.5.0"
+importlib-resources = "*"
 
 [dev-packages]
 pytest = "*"

diff --git a/presidio-image-redactor/Pipfile.lock b/presidio-image-redactor/Pipfile.lock
diff --git a/presidio-image-redactor/README.MD b/presidio-image-redactor/README.MD
@@ -16,6 +16,10 @@ Process for standard images:
 
 ![Image Redactor Design](../docs/assets/image-redactor-design.png)
 
+Process for images with QR codes:
+
+![QRImage Redactor Design](../docs/assets/qr-image-redactor-design.png)
+
 Process for DICOM files:
 
 ![DICOM image Redactor Design](../docs/assets/dicom-image-redactor-design.png)
@@ -117,6 +121,30 @@ curl -XPOST "http://localhost:3000/redact" -H "content-type: multipart/form-data
 Python script example can be found under:
 /presidio/e2e-tests/tests/test_image_redactor.py
 
+## Getting started (images with QR codes)
+
+`QRImageAnalyzerEngine` is used by `ImageRedactorEngineto` to redact QR codes.
+
+```python
+from PIL import Image
+from presidio_image_redactor import ImageRedactorEngine
+from presidio_image_redactor import QRImageAnalyzerEngine
+
+# Get the image to redact using PIL lib (pillow)
+image = Image.open("presidio-image-redactor/tests/integration/resources/qr.png")
+
+# Initialize the engine
+engine = ImageRedactorEngine(image_analyzer_engine=QRImageAnalyzerEngine())
+
+# Redact the image with pink color
+redacted_image = engine.redact(image, (255, 192, 203))
+
+# save the redacted image 
+redacted_image.save("new_image.png")
+# uncomment to open the image for viewing
+# redacted_image.show()
+```
+
 ## Getting started (DICOM images)
 
 This module only redacts pixel data and does not scrub text PHI which may exist in the DICOM metadata.

diff --git a/presidio-image-redactor/presidio_image_redactor/__init__.py b/presidio-image-redactor/presidio_image_redactor/__init__.py
@@ -5,11 +5,13 @@
 from .tesseract_ocr import TesseractOCR
 from .bbox import BboxProcessor
 from .image_analyzer_engine import ImageAnalyzerEngine
+from .qr_image_analyzer_engine import QRImageAnalyzerEngine
 from .image_redactor_engine import ImageRedactorEngine
 from .image_pii_verify_engine import ImagePiiVerifyEngine
 from .dicom_image_redactor_engine import DicomImageRedactorEngine
 from .dicom_image_pii_verify_engine import DicomImagePiiVerifyEngine
 
+
 # Set up default logging (with NullHandler)
 logging.getLogger("presidio-image-redactor").addHandler(logging.NullHandler())
 
@@ -18,6 +20,7 @@
     "TesseractOCR",
     "BboxProcessor",
     "ImageAnalyzerEngine",
+    "QRImageAnalyzerEngine",
     "ImageRedactorEngine",
     "ImagePiiVerifyEngine",
     "DicomImageRedactorEngine",

diff --git a/presidio-image-redactor/presidio_image_redactor/image_pii_verify_engine.py b/presidio-image-redactor/presidio_image_redactor/image_pii_verify_engine.py
@@ -1,9 +1,10 @@
 from PIL import Image, ImageChops
 from presidio_image_redactor.image_analyzer_engine import ImageAnalyzerEngine
+from presidio_image_redactor import QRImageAnalyzerEngine
 import matplotlib
 import io
 from matplotlib import pyplot as plt
-from typing import Optional
+from typing import Optional, Union
 
 
 def fig2img(fig):
@@ -19,7 +20,10 @@ def fig2img(fig):
 class ImagePiiVerifyEngine:
     """ImagePiiVerifyEngine class only supporting Pii verification currently."""
 
-    def __init__(self, image_analyzer_engine: Optional[ImageAnalyzerEngine] = None):
+    def __init__(
+        self,
+        image_analyzer_engine: Union[ImageAnalyzerEngine, QRImageAnalyzerEngine] = None,
+    ):
         if not image_analyzer_engine:
             image_analyzer_engine = ImageAnalyzerEngine()
         self.image_analyzer_engine = image_analyzer_engine
@@ -42,9 +46,12 @@ def verify(
 
         image = ImageChops.duplicate(image)
         image_x, image_y = image.size
-        bboxes = self.image_analyzer_engine.analyze(
-            image, ocr_kwargs, **text_analyzer_kwargs
-        )
+        if isinstance(self.image_analyzer_engine, QRImageAnalyzerEngine):
+            bboxes = self.image_analyzer_engine.analyze(image, **text_analyzer_kwargs)
+        else:
+            bboxes = self.image_analyzer_engine.analyze(
+                image, ocr_kwargs, **text_analyzer_kwargs
+            )
         fig, ax = plt.subplots()
         image_r = 70
         fig.set_size_inches(image_x / image_r, image_y / image_r)

diff --git a/presidio-image-redactor/presidio_image_redactor/image_redactor_engine.py b/presidio-image-redactor/presidio_image_redactor/image_redactor_engine.py
@@ -2,7 +2,11 @@
 
 from PIL import Image, ImageDraw, ImageChops
 
-from presidio_image_redactor import ImageAnalyzerEngine, BboxProcessor
+from presidio_image_redactor import (
+    ImageAnalyzerEngine,
+    QRImageAnalyzerEngine,
+    BboxProcessor,
+)
 
 
 class ImageRedactorEngine:
@@ -11,7 +15,10 @@ class ImageRedactorEngine:
     :param image_analyzer_engine: Engine which performs OCR + PII detection.
     """
 
-    def __init__(self, image_analyzer_engine: ImageAnalyzerEngine = None):
+    def __init__(
+        self,
+        image_analyzer_engine: Union[ImageAnalyzerEngine, QRImageAnalyzerEngine] = None,
+    ):
         if not image_analyzer_engine:
             self.image_analyzer_engine = ImageAnalyzerEngine()
         else:
@@ -42,9 +49,12 @@ def redact(
 
         image = ImageChops.duplicate(image)
 
-        bboxes = self.image_analyzer_engine.analyze(
-            image, ocr_kwargs, **text_analyzer_kwargs
-        )
+        if isinstance(self.image_analyzer_engine, QRImageAnalyzerEngine):
+            bboxes = self.image_analyzer_engine.analyze(image, **text_analyzer_kwargs)
+        else:
+            bboxes = self.image_analyzer_engine.analyze(
+                image, ocr_kwargs, **text_analyzer_kwargs
+            )
         draw = ImageDraw.Draw(image)
 
         for box in bboxes:

diff --git a/presidio-image-redactor/presidio_image_redactor/qr_image_analyzer_engine.py b/presidio-image-redactor/presidio_image_redactor/qr_image_analyzer_engine.py
@@ -0,0 +1,62 @@
+from typing import List, Optional
+
+from presidio_analyzer import AnalyzerEngine
+
+from presidio_image_redactor.entities import ImageRecognizerResult
+from presidio_image_redactor.qr_recognizer import QRRecognizer
+from presidio_image_redactor.qr_recognizer import OpenCVQRRecongnizer
+
+
+class QRImageAnalyzerEngine:
+    """QRImageAnalyzerEngine class.
+
+    :param analyzer_engine: The Presidio AnalyzerEngine instance
+        to be used to detect PII in text
+    :param qr: the QRRecognizer object to detect and decode text in QR codes
+    """
+
+    def __init__(
+        self,
+        analyzer_engine: Optional[AnalyzerEngine] = None,
+        qr: Optional[QRRecognizer] = None,
+    ):
+        if not analyzer_engine:
+            analyzer_engine = AnalyzerEngine()
+        self.analyzer_engine = analyzer_engine
+
+        if not qr:
+            qr = OpenCVQRRecongnizer()
+        self.qr = qr
+
+    def analyze(
+        self, image: object, **text_analyzer_kwargs
+    ) -> List[ImageRecognizerResult]:
+        """Analyse method to analyse the given image.
+
+        :param image: PIL Image/numpy array to be processed.
+        :param text_analyzer_kwargs: Additional values for the analyze method
+        in AnalyzerEngine.
+
+        :return: List of the extract entities with image bounding boxes.
+        """
+        bboxes = []
+
+        qr_result = self.qr.recognize(image)
+        for qr_code in qr_result:
+            analyzer_result = self.analyzer_engine.analyze(
+                text=qr_code.text, language="en", **text_analyzer_kwargs
+            )
+            for res in analyzer_result:
+                bboxes.append(
+                    ImageRecognizerResult(
+                        res.entity_type,
+                        res.start,
+                        res.end,
+                        res.score,
+                        qr_code.bbox[0],
+                        qr_code.bbox[1],
+                        qr_code.bbox[2],
+                        qr_code.bbox[3],
+                    )
+                )
+        return bboxes
diff --git a/presidio-image-redactor/presidio_image_redactor/qr_recognizer.py b/presidio-image-redactor/presidio_image_redactor/qr_recognizer.py
@@ -0,0 +1,144 @@
+from abc import ABC, abstractmethod
+from typing import Tuple, List, Optional
+import cv2
+import numpy as np
+
+
+class QRRecognizerResult:
+    """
+    Represent the results of analysing the image by QRRecognizer.
+
+    :param text: Decoded text
+    :param bbox: Bounding box in the following format - [left, top, width, height]
+    :param polygon: Polygon aroung QR code
+    """
+
+    def __init__(
+        self,
+        text: str,
+        bbox: Tuple[int, int, int, int],
+        polygon: Optional[List[int]] = None,
+    ):
+        self.text = text
+        self.bbox = bbox
+        self.polygon = polygon
+
+    def __eq__(self, other):
+        """
+        Compare two QRRecognizerResult objects.
+
+        :param other: another QRRecognizerResult object
+        :return: bool
+        """
+        equal_text = self.text == other.text
+        equal_bbox = self.bbox == other.bbox
+        equal_polygon = self.polygon == other.polygon
+
+        return equal_text and equal_bbox and equal_polygon
+
+    def __repr__(self) -> str:
+        """Return a string representation of the instance."""
+        return (
+            f"{type(self).__name__}("
+            f"text={self.text}, "
+            f"bbox={self.bbox}, "
+            f"polygon={self.polygon})"
+        )
+
+
+class QRRecognizer(ABC):
+    """
+    A class representing an abstract QR code recognizer.
+
+    QRRecognizer is an abstract class to be inherited by
+    recognizers which hold the logic for recognizing QR codes on the images.
+    """
+
+    @abstractmethod
+    def recognize(self, image: object) -> List[QRRecognizerResult]:
+        """Detect and decode QR codes on the image.
+
+        :param image: PIL Image/numpy array to be processed
+
+        :return: List of the recognized QR codes
+        """
+
+
+class OpenCVQRRecongnizer(QRRecognizer):
+    """
+    QR code recognition using OpenCV.
+
+    Example of the usage:
+        from presidio_image_redactor import OpenCVQRRecognizer
+
+        image = cv2.imread("qrcode.jpg")
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+
+        recognized = OpenCVQRRecongnizer().recognize(image)
+    """
+
+    def __init__(self) -> None:
+        self.detector = cv2.QRCodeDetector()
+
+    def recognize(self, image: object) -> List[QRRecognizerResult]:
+        """Detect and decode QR codes on the image.
+
+        :param image: PIL Image/numpy array to be processed
+
+        :return: List of the recognized QR codes
+        """
+
+        if not isinstance(image, np.ndarray):
+            image = np.array(image, dtype=np.uint8)
+
+        recognized = []
+
+        ret, points = self._detect(image)
+
+        if ret:
+            decoded = self._decode(image, points)
+
+            for text, p in zip(decoded, points):
+                (x, y, w, h) = cv2.boundingRect(p)
+
+                recognized.append(
+                    QRRecognizerResult(
+                        text=text, bbox=[x, y, w, h], polygon=[*p.flatten(), *p[0]]
+                    )
+                )
-            for text, p in zip(decoded, points):
-                (x, y, w, h) = cv2.boundingRect(p)
-
-                recognized.append(
-                    QRRecognizerResult(
-                        text=text, bbox=[x, y, w, h], polygon=[*p.flatten(), *p[0]]
-                    )
-                )
+recognized = [QRRecognizerResult(text=text, bbox=cv2.boundingRect(point), polygon=[*point.flatten(), *point[0]]) for text, point in zip(decoded, points)]
+
-            for text, p in zip(decoded, points):
-                (x, y, w, h) = cv2.boundingRect(p)
-
-                recognized.append(
-                    QRRecognizerResult(
-                        text=text, bbox=[x, y, w, h], polygon=[*p.flatten(), *p[0]]
-                    )
-                )
+recognized = [QRRecognizerResult(text=text, bbox=cv2.boundingRect(point), polygon=[*point.flatten(), *point[0]]) for text, point in zip(decoded, points)]
+
+
+        return recognized
+
+    def _detect(self, image: object) -> Tuple[float, Optional[np.ndarray]]:
+        """Detect QR codes on the image.
+
+        :param image: Numpy array to be processed
+
+        :return: Detection status and list of the points around QR codes
+        """
+
+        ret, points = self.detector.detectMulti(image)
+
+        if not ret:
+            ret, points = self.detector.detect(image)
+        if points is not None:
+            points = points.astype(int)
+
+        return ret, points
+
+    def _decode(self, image: object, points: np.ndarray) -> Tuple[str]:
+        """Decode QR codes on the image.
+
+        :param image: Numpy array to be processed
+        :param points: Detected points
+
+        :return: Tuple with decoded QR codes
+        """
+
+        if len(points) == 1:
+            decoded, _ = self.detector.decode(image, points)
+            decoded = (decoded,)
+        else:
+            _, decoded, _ = self.detector.decodeMulti(image, points)
+
+        return decoded