Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix better_match check used by regex search routine #1547

Closed
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 32 additions & 8 deletions stl/inc/regex
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <algorithm>
#include <cctype>
#include <climits>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <cwchar>
Expand Down Expand Up @@ -3398,19 +3399,42 @@ bool _Matcher<_BidIt, _Elem, _RxTraits, _It>::_Do_class(_Node_base* _Nx) { // ap
}

template <class _BidIt, class _Elem, class _RxTraits, class _It>
bool _Matcher<_BidIt, _Elem, _RxTraits, _It>::_Better_match() { // check for better match under UNIX rules
bool _Matcher<_BidIt, _Elem, _RxTraits, _It>::_Better_match() { // check for better match under leftmost-longest rules
for (unsigned int _Ix = 0; _Ix < _Get_ncap(); ++_Ix) { // check each capture group
if (_Res._Grp_valid[_Ix] && _Tgt_state._Grp_valid[_Ix]) {
if (_Res._Grps[_Ix]._Begin != _Tgt_state._Grps[_Ix]._Begin) {
return _STD distance(_Begin, _Res._Grps[_Ix]._Begin)
< _STD distance(_Begin, _Tgt_state._Grps[_Ix]._Begin);
const auto& _Res_grp = _Res._Grps[_Ix];
const auto& _Tgt_grp = _Tgt_state._Grps[_Ix];
if (_Res_grp._Begin == _Res_grp._End) {
if (_Tgt_grp._Begin == _Tgt_grp._End) {
// Both groups are empty or invalid - matches are equivalent.
continue;
}
// Target group is valid and non-empty, and current group is either empty or invalid.
return true;
}
if (_Tgt_grp._Begin == _Tgt_grp._End) {
Comment on lines +3413 to +3414
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We conventionally add newlines between "non-chained" if-statements.

// Target group is invalid or empty, and current group is valid and non-empty,
// so our current group is the better match.
return false;
}

if (_Res._Grps[_Ix]._End != _Tgt_state._Grps[_Ix]._End) {
return _STD distance(_Begin, _Res._Grps[_Ix]._End) < _STD distance(_Begin, _Tgt_state._Grps[_Ix]._End);
}
if (_Res_grp._Begin != _Tgt_grp._Begin) {
// If one of the matches starts before the other, it is considered better.
// This is also known as "leftmost", in languages written from left to right.
ptrdiff_t _Res_dist_from_begin = _STD distance(_Begin, _Res_grp._Begin);
ptrdiff_t _Tgt_dist_from_begin = _STD distance(_Begin, _Tgt_grp._Begin);
return _Tgt_dist_from_begin < _Res_dist_from_begin;
}

if (_Res_grp._End != _Tgt_grp._End) {
// If one of the matches is longer than the other, and they start at the same point,
// it is considered better.
ptrdiff_t _Res_size = _STD distance(_Res_grp._Begin, _Res_grp._End);
ptrdiff_t _Tgt_size = _STD distance(_Tgt_grp._Begin, _Tgt_grp._End);
return _Tgt_size > _Res_size;
}
}

// Use the current match if target isn't better.
return false;
}

Expand Down
51 changes: 51 additions & 0 deletions tests/std/include/test_regex_support.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#pragma once
#include <cstdio>
#include <numeric>
#include <regex>
#include <string>

Expand Down Expand Up @@ -116,6 +117,56 @@ class regex_fixture {
}
}

void should_capture_groups(
const std::string& subject, const std::string& pattern, const std::vector<std::string>& groups) {
try {
const std::regex r(pattern);
std::smatch m;

if (!std::regex_match(subject, m, r)) {
printf(R"(Expected regex("%s") to match "%s".)"
"\n",
pattern.c_str(), subject.c_str());
fail_regex();
return;
}

if (m[0] != subject) {
printf(R"(should_capture("%s", "%s"): m[0] == "%s")"
"\n",
subject.c_str(), pattern.c_str(), m[0].str().c_str());
fail_regex();
}

if (m.size() != groups.size() + 1) {
printf(R"(should_capture("%s", "%s"): captures: %zd != %zd)"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

size_t is unsigned, so this should say %zu (as should_capture() does). Occurs below.

"\n",
subject.c_str(), pattern.c_str(), m.size() - 1, groups.size());
fail_regex();
}

for (size_t i = 0; i < groups.size(); ++i) {
if (m[i + 1] != groups[i]) {
printf(R"(should_capture("%s", "%s"): index %zd)"
"\nexpected: \"%s\""
"\nfound: \"%s\"\n",
subject.c_str(), pattern.c_str(), i, m[i + 1].str().c_str(), groups[i].c_str());
fail_regex();
}
}
} catch (const std::regex_error& e) {
std::string groups_string = std::accumulate(groups.cbegin(), groups.cend(), std::string(),
[](const std::string& result, const std::string& value) -> std::string {
return !result.empty() ? result + ";" + value : value;
});

printf(R"(should_capture("%s", "%s", "%s"): regex_error: "%s")"
"\n",
subject.c_str(), pattern.c_str(), groups_string.c_str(), e.what());
fail_regex();
}
}

void should_replace_to(const std::string& subject, const std::string& pattern, const std::string& fmt,
const std::regex_constants::match_flag_type match_flags, const std::string& expected) {
try {
Expand Down
9 changes: 9 additions & 0 deletions tests/std/tests/VSO_0000000_regex_use/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,14 @@ void test_VSO_226914_word_boundaries() {
aWordAny.should_search_fail("aa", match_not_bow | match_not_eow);
}

void test_gh_731() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be followed by a comment mentioning the bug's title (like test_gh_993() immediately below).

std::vector<std::string> groups{"AAA", "BBB", ""};
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test is already using namespace std;. Occurs below.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should probably include <vector> even if <test_regex_support.hpp> already does so.

g_regexTester.should_capture_groups("AAA BBB", R"((A+)\s*(B+)?\s*B*(B*)?)", groups);

groups.assign({"AAA", "BBB"});
g_regexTester.should_capture_groups("AAA BBB", R"((A+)\s*(B+)?\s*B*)", groups);
}

void test_gh_993() {
// GH-993 regex::icase is not handled correctly for some input.
{
Expand Down Expand Up @@ -607,6 +615,7 @@ int main() {
test_VSO_225160_match_bol_flag();
test_VSO_225160_match_eol_flag();
test_VSO_226914_word_boundaries();
test_gh_731();
test_gh_993();

return g_regexTester.result();
Expand Down