diff --git a/stl/inc/regex b/stl/inc/regex index 77efc9d32f..384c7016b7 100644 --- a/stl/inc/regex +++ b/stl/inc/regex @@ -1187,7 +1187,7 @@ _NODISCARD bool operator!=(const match_results<_BidIt, _Alloc>& _Left, const mat } #endif // !_HAS_CXX20 -_INLINE_VAR constexpr unsigned int _BRE_MAX_GRP = 9U; +_INLINE_VAR constexpr int _BRE_MAX_BACKREF_DIGITS = 1; _INLINE_VAR constexpr unsigned int _Bmp_max = 256U; // must fit in an unsigned int _INLINE_VAR constexpr unsigned int _Bmp_shift = 3U; @@ -1705,7 +1705,7 @@ private: // parsing int _Do_digits(int _Base, int _Count, regex_constants::error_type _Error_type); - bool _DecimalDigits(regex_constants::error_type _Error_type); + bool _DecimalDigits2(regex_constants::error_type _Error_type, int _Count = INT_MAX); void _HexDigits(int); bool _OctalDigits(); void _Do_ex_class(_Meta_type); @@ -3950,9 +3950,9 @@ int _Parser<_FwdIt, _Elem, _RxTraits>::_Do_digits( } template -bool _Parser<_FwdIt, _Elem, _RxTraits>::_DecimalDigits( - regex_constants::error_type _Error_type) { // check for decimal value - return _Do_digits(10, INT_MAX, _Error_type) != INT_MAX; +bool _Parser<_FwdIt, _Elem, _RxTraits>::_DecimalDigits2( + const regex_constants::error_type _Error_type, const int _Count /*= INT_MAX */) { // check for decimal value + return _Do_digits(10, _Count, _Error_type) != _Count; } template @@ -4041,7 +4041,7 @@ _Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassEscape2() { // check for class return _Prs_chr; } else if ((_L_flags & _L_esc_wsd) && _CharacterClassEscape(false)) { return _Prs_set; - } else if (_DecimalDigits(regex_constants::error_escape)) { // check for invalid value + } else if (_DecimalDigits2(regex_constants::error_escape)) { // check for invalid value if (_Val != 0) { _Error(regex_constants::error_escape); } @@ -4333,15 +4333,16 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_CharacterEscape() { // check for valid template void _Parser<_FwdIt, _Elem, _RxTraits>::_AtomEscape() { // check for valid atom escape - if ((_L_flags & _L_bckr) && _DecimalDigits(regex_constants::error_backref)) { // check for valid back reference + if ((_L_flags & _L_bckr) + && _DecimalDigits2(regex_constants::error_backref, + (_L_flags & _L_lim_bckr) ? _BRE_MAX_BACKREF_DIGITS : INT_MAX)) { // check for valid back reference if (_Val == 0) { // handle \0 if (!(_L_flags & _L_bzr_chr)) { _Error(regex_constants::error_escape); } else { _Nfa._Add_char(static_cast<_Elem>(_Val)); } - } else if (((_L_flags & _L_lim_bckr) && _BRE_MAX_GRP < static_cast(_Val)) - || _Grp_idx < static_cast(_Val) || _Finished_grps.size() <= static_cast(_Val) + } else if (_Grp_idx < static_cast(_Val) || _Finished_grps.size() <= static_cast(_Val) || !_Finished_grps[static_cast(_Val)]) { _Error(regex_constants::error_backref); } else { @@ -4365,7 +4366,7 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Quantifier() { // check for quantifier _Max = 1; } else if (_Mchar == _Meta_lbr) { // check for valid bracketed value _Next(); - if (!_DecimalDigits(regex_constants::error_badbrace)) { + if (!_DecimalDigits2(regex_constants::error_badbrace)) { _Error(regex_constants::error_badbrace); } @@ -4375,7 +4376,7 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Quantifier() { // check for quantifier } else { // check for decimal constant following comma _Next(); if (_Mchar != _Meta_rbr) { - if (!_DecimalDigits(regex_constants::error_badbrace)) { + if (!_DecimalDigits2(regex_constants::error_badbrace)) { _Error(regex_constants::error_badbrace); } diff --git a/tests/std/tests/VSO_0000000_regex_use/test.cpp b/tests/std/tests/VSO_0000000_regex_use/test.cpp index ad2efc0d4a..6b36d98e20 100644 --- a/tests/std/tests/VSO_0000000_regex_use/test.cpp +++ b/tests/std/tests/VSO_0000000_regex_use/test.cpp @@ -669,6 +669,20 @@ void test_gh_5160() { neg_regex.should_search_fail(L"xxxYxx\x2009xxxZxxx"); // U+2009 THIN SPACE } +void test_gh_5167() { + // GH-5167: Limit backreference parsing to single digit for basic regular expressions + g_regexTester.should_match("abab0", R"x(\(ab*\)\10)x", basic); + g_regexTester.should_match("abab0", R"x(\(ab*\)\10)x", grep); + g_regexTester.should_match("abbcdccdc5abb8", R"x(\(ab*\)\([cd]*\)\25\18)x", basic); + g_regexTester.should_match("abbcdccdc5abb8", R"x(\(ab*\)\([cd]*\)\25\18)x", grep); + g_regexTester.should_not_match("abbcdccdc5abb8", R"x(\(ab*\)\([cd]*\)\15\28)x", basic); + g_regexTester.should_not_match("abbcdccdc5abb8", R"x(\(ab*\)\([cd]*\)\15\28)x", grep); + g_regexTester.should_throw(R"x(abc\1d)x", error_backref, basic); + g_regexTester.should_throw(R"x(abc\1d)x", error_backref, grep); + g_regexTester.should_throw(R"x(abc\10)x", error_backref, basic); + g_regexTester.should_throw(R"x(abc\10)x", error_backref, grep); +} + int main() { test_dev10_449367_case_insensitivity_should_work(); test_dev11_462743_regex_collate_should_not_disable_regex_icase(); @@ -699,6 +713,7 @@ int main() { test_gh_4995(); test_gh_5058(); test_gh_5160(); + test_gh_5167(); return g_regexTester.result(); }