Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API Key recognizer #1339

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from typing import List, Optional
from presidio_analyzer import Pattern, PatternRecognizer
import unittest

class APIKeyRecognizer(PatternRecognizer):
"""
Recognizes API Keys using regex.

:param patterns: List of patterns to be used by this recognizer
:param context: List of context words to increase confidence in detection
:param supported_language: Language this recognizer supports
:param supported_entity: The entity this recognizer can detect
"""

PATTERNS = [
Pattern(
"API Key",
(
r"\b(?i)([A-Za-z0-9]{20,40}|[A-Za-z0-9]{6}-[A-Za-z0-9]{6}-[A-Za-z0-9]{6})\b"
),
0.2 # low confidence
),
]
CONTEXT = ["api", "api key", "token", "secret", "access key", "access_token"]

def __init__(
self,
patterns: Optional[List[Pattern]] = None,
context: Optional[List[str]] = None,
supported_language: str = "en",
supported_entity: str = "API_KEY",
):
patterns = patterns if patterns else self.PATTERNS
context = context if context else self.CONTEXT
super().__init__(
supported_entity=supported_entity,
patterns=patterns,
context=context,
supported_language=supported_language,
)

def analyze(self, text: str) -> List:
Nweaver412 marked this conversation as resolved.
Show resolved Hide resolved
"""
Analyze function for API key recognition.

:param text: Text to analyze
:return: List of recognition results
"""
return super().analyze(text)


class TestAPIKeyRecognizer(unittest.TestCase):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please move the test to a new file under /tests. See many examples of other recognizers. Thanks!


def setUp(self):
self.recognizer = APIKeyRecognizer()

def test_recognizer_exists(self):
self.assertIsNotNone(self.recognizer)

def test_pattern_exists(self):
self.assertTrue(len(self.recognizer.PATTERNS) > 0)

def test_pattern_attributes(self):
for pattern in self.recognizer.PATTERNS:
self.assertIsInstance(pattern, Pattern)
self.assertIsNotNone(pattern.name)
self.assertIsNotNone(pattern.regex)
self.assertIsNotNone(pattern.score)

def test_context(self):
self.assertTrue(len(self.recognizer.CONTEXT) > 0)

def test_recognize_api_key(self):
text_with_api_key = "Here is my API key: w9aKPvHhu1zeD4Tb65G2rQfXNlYU0WJc" # Fake Token
results = self.recognizer.analyze(text_with_api_key)
self.assertTrue(len(results) > 0)
for result in results:
self.assertEqual(result.entity_type, "API_KEY")


if __name__ == '__main__':
unittest.main()