Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

<regex>: Make negated character classes not match characters not included in the negated character class #5214

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions stl/inc/regex
Original file line number Diff line number Diff line change
Expand Up @@ -1500,7 +1500,7 @@ public:
void _Add_class();
void _Add_char_to_class(_Elem _Ch);
void _Add_range(_Elem _Ex0, _Elem _Ex1);
void _Add_named_class(_Regex_traits_base::char_class_type, bool = false);
void _Add_named_class(typename _RxTraits::char_class_type, bool);
void _Add_equiv(_FwdIt, _FwdIt, _Difft);
void _Add_coll(_FwdIt, _FwdIt, _Difft);
_Node_base* _Begin_group();
Expand All @@ -1524,7 +1524,7 @@ private:
bool _Beg_expr(_Node_base*) const;
void _Add_char_to_bitmap(_Elem _Ch);
void _Add_char_to_array(_Elem _Ch);
void _Add_elts(_Node_class<_Elem, _RxTraits>*, _Regex_traits_base::char_class_type, bool);
void _Add_elts(_Node_class<_Elem, _RxTraits>*, typename _RxTraits::char_class_type, bool);
void _Char_to_elts(_FwdIt, _FwdIt, _Difft, _Sequence<_Elem>**);

_Root_node* _Root;
Expand Down Expand Up @@ -2920,7 +2920,7 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_range(_Elem _Arg0, _Elem _Arg1) {

template <class _FwdIt, class _Elem, class _RxTraits>
void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_elts(
_Node_class<_Elem, _RxTraits>* _Node, _Regex_traits_base::char_class_type _Cl, bool _Negate) {
_Node_class<_Elem, _RxTraits>* _Node, typename _RxTraits::char_class_type _Cl, bool _Negate) {
// add characters in named class to set
for (unsigned int _Ch = 0; _Ch < _Bmp_max; ++_Ch) { // add elements or their inverse
bool _Matches = _Traits.isctype(static_cast<_Elem>(_Ch), _Cl);
Expand All @@ -2935,12 +2935,12 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_elts(
}

template <class _FwdIt, class _Elem, class _RxTraits>
void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_named_class(typename _Regex_traits_base::char_class_type _Cl,
void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_named_class(typename _RxTraits::char_class_type _Cl,
bool _Negate) { // add contents of named class to bracket expression
_Node_class<_Elem, _RxTraits>* _Node = static_cast<_Node_class<_Elem, _RxTraits>*>(_Current);
_Add_elts(_Node, _Cl, _Negate);
if (_Bmp_max < static_cast<unsigned int>(_STD _Max_limit<_Elem>())) {
_Node->_Classes = static_cast<_Regex_traits_base::char_class_type>(_Node->_Classes | _Cl);
if (_Bmp_max <= _STD _Max_limit<typename _RxTraits::_Uelem>() && !_Negate) {
_Node->_Classes = static_cast<typename _RxTraits::char_class_type>(_Node->_Classes | _Cl);
}
}

Expand Down Expand Up @@ -3991,7 +3991,7 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Do_ex_class(
_Error(regex_constants::error_ctype);
}

_Nfa._Add_named_class(_Cls);
_Nfa._Add_named_class(_Cls, false);
} else if (_End_arg == _Meta_equal) { // process =
if (_Beg == _Pat) {
_Error(regex_constants::error_collate);
Expand Down
19 changes: 19 additions & 0 deletions tests/std/tests/VSO_0000000_regex_use/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,24 @@ void test_gh_5160() {
neg_regex.should_search_fail(L"xxxYxx\x2009xxxZxxx"); // U+2009 THIN SPACE
}

void test_gh_5214() {
// GH-5214 makes negated character class escapes not match characters not included in the negated character class
{
const test_wregex neg_word_regex(&g_regexTester, LR"([\W])");
neg_word_regex.should_search_fail(L"\u0100"); // U+0100 LATIN CAPITAL LETTER A WITH MACRON
}

{
const test_wregex neg_space_regex(&g_regexTester, LR"([\S])");
neg_space_regex.should_search_fail(L"\u2028"); // U+2028 LINE SEPARATOR
}

{
const test_wregex neg_digit_regex(&g_regexTester, LR"([\D])");
neg_digit_regex.should_search_fail(L"\u0662"); // U+0662 ARABIC-INDIC DIGIT TWO
}
}

int main() {
test_dev10_449367_case_insensitivity_should_work();
test_dev11_462743_regex_collate_should_not_disable_regex_icase();
Expand Down Expand Up @@ -699,6 +717,7 @@ int main() {
test_gh_4995();
test_gh_5058();
test_gh_5160();
test_gh_5214();

return g_regexTester.result();
}