Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

<regex>: Limit backreference parsing to single digit for basic regular expressions #5167

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 12 additions & 9 deletions stl/inc/regex
Original file line number Diff line number Diff line change
Expand Up @@ -1187,7 +1187,8 @@ _NODISCARD bool operator!=(const match_results<_BidIt, _Alloc>& _Left, const mat
}
#endif // !_HAS_CXX20

_INLINE_VAR constexpr unsigned int _BRE_MAX_GRP = 9U;
_INLINE_VAR constexpr unsigned int _BRE_MAX_GRP = 9U;
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
_INLINE_VAR constexpr int _BRE_MAX_BACKREF_DIGITS = 1;

_INLINE_VAR constexpr unsigned int _Bmp_max = 256U; // must fit in an unsigned int
_INLINE_VAR constexpr unsigned int _Bmp_shift = 3U;
Expand Down Expand Up @@ -1705,7 +1706,7 @@ private:

// parsing
int _Do_digits(int _Base, int _Count, regex_constants::error_type _Error_type);
bool _DecimalDigits(regex_constants::error_type _Error_type);
bool _DecimalDigits2(regex_constants::error_type _Error_type, int _Count = INT_MAX);
void _HexDigits(int);
bool _OctalDigits();
void _Do_ex_class(_Meta_type);
Expand Down Expand Up @@ -3950,9 +3951,9 @@ int _Parser<_FwdIt, _Elem, _RxTraits>::_Do_digits(
}

template <class _FwdIt, class _Elem, class _RxTraits>
bool _Parser<_FwdIt, _Elem, _RxTraits>::_DecimalDigits(
regex_constants::error_type _Error_type) { // check for decimal value
return _Do_digits(10, INT_MAX, _Error_type) != INT_MAX;
bool _Parser<_FwdIt, _Elem, _RxTraits>::_DecimalDigits2(
const regex_constants::error_type _Error_type, const int _Count /*= INT_MAX */) { // check for decimal value
return _Do_digits(10, _Count, _Error_type) != _Count;
}

template <class _FwdIt, class _Elem, class _RxTraits>
Expand Down Expand Up @@ -4041,7 +4042,7 @@ _Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassEscape2() { // check for class
return _Prs_chr;
} else if ((_L_flags & _L_esc_wsd) && _CharacterClassEscape(false)) {
return _Prs_set;
} else if (_DecimalDigits(regex_constants::error_escape)) { // check for invalid value
} else if (_DecimalDigits2(regex_constants::error_escape)) { // check for invalid value
if (_Val != 0) {
_Error(regex_constants::error_escape);
}
Expand Down Expand Up @@ -4333,7 +4334,9 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_CharacterEscape() { // check for valid

template <class _FwdIt, class _Elem, class _RxTraits>
void _Parser<_FwdIt, _Elem, _RxTraits>::_AtomEscape() { // check for valid atom escape
if ((_L_flags & _L_bckr) && _DecimalDigits(regex_constants::error_backref)) { // check for valid back reference
if ((_L_flags & _L_bckr)
&& _DecimalDigits2(regex_constants::error_backref,
(_L_flags & _L_lim_bckr) ? _BRE_MAX_BACKREF_DIGITS : INT_MAX)) { // check for valid back reference
if (_Val == 0) { // handle \0
if (!(_L_flags & _L_bzr_chr)) {
_Error(regex_constants::error_escape);
Expand Down Expand Up @@ -4365,7 +4368,7 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Quantifier() { // check for quantifier
_Max = 1;
} else if (_Mchar == _Meta_lbr) { // check for valid bracketed value
_Next();
if (!_DecimalDigits(regex_constants::error_badbrace)) {
if (!_DecimalDigits2(regex_constants::error_badbrace)) {
_Error(regex_constants::error_badbrace);
}

Expand All @@ -4375,7 +4378,7 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Quantifier() { // check for quantifier
} else { // check for decimal constant following comma
_Next();
if (_Mchar != _Meta_rbr) {
if (!_DecimalDigits(regex_constants::error_badbrace)) {
if (!_DecimalDigits2(regex_constants::error_badbrace)) {
_Error(regex_constants::error_badbrace);
}

Expand Down
15 changes: 15 additions & 0 deletions tests/std/tests/VSO_0000000_regex_use/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,20 @@ void test_gh_5160() {
neg_regex.should_search_fail(L"xxxYxx\x2009xxxZxxx"); // U+2009 THIN SPACE
}

void test_gh_5167() {
// GH-5167: Limit backreference parsing to single digit for basic regular expressions
g_regexTester.should_match("abab0", R"x(\(ab*\)\10)x", basic);
g_regexTester.should_match("abab0", R"x(\(ab*\)\10)x", grep);
g_regexTester.should_match("abbcdccdc5abb8", R"x(\(ab*\)\([cd]*\)\25\18)x", basic);
g_regexTester.should_match("abbcdccdc5abb8", R"x(\(ab*\)\([cd]*\)\25\18)x", grep);
g_regexTester.should_not_match("abbcdccdc5abb8", R"x(\(ab*\)\([cd]*\)\15\28)x", basic);
g_regexTester.should_not_match("abbcdccdc5abb8", R"x(\(ab*\)\([cd]*\)\15\28)x", grep);
g_regexTester.should_throw(R"x(abc\1d)x", error_backref, basic);
g_regexTester.should_throw(R"x(abc\1d)x", error_backref, grep);
g_regexTester.should_throw(R"x(abc\10)x", error_backref, basic);
g_regexTester.should_throw(R"x(abc\10)x", error_backref, grep);
}

int main() {
test_dev10_449367_case_insensitivity_should_work();
test_dev11_462743_regex_collate_should_not_disable_regex_icase();
Expand Down Expand Up @@ -699,6 +713,7 @@ int main() {
test_gh_4995();
test_gh_5058();
test_gh_5160();
test_gh_5167();

return g_regexTester.result();
}