Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

<regex>: Fix integer overflow in _Buf and implement geometric buffer expansion #5175

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 35 additions & 12 deletions stl/inc/regex
Original file line number Diff line number Diff line change
Expand Up @@ -1257,9 +1257,9 @@ struct _Buf { // character buffer
return _Chrs;
}

void _Insert(_Elem _Ch) { // append _Ch
void _Insert2(_Elem _Ch) { // append _Ch
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think MSVC STL prefers _v2 suffix over 2. A plain 2 seemingly introduces some semantic implication, although it should not in this case.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't mind either way, but the _v2 suffix is not the style that has been used in <regex> for new versions up till now. See Casey's #131 and all the other occurrences of functions ending in 1 or 2 in this file (although I have to admit I'm not following this precedent strictly as well because I find it weird to label the second version of a function by appending 1). On the other hand, there are no functions ending in _vxxx.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We've got examples of both, e.g. _Stodx_v3 and _Orphan_all_unlocked_v3. I'm fine with either.

if (_Sz <= _Nchrs) {
_Expand(_Nchrs + _Buf_incr);
_Expand2(1U);
}

_Chrs[_Nchrs++] = _Ch;
Expand All @@ -1270,17 +1270,40 @@ struct _Buf { // character buffer
}

template <class _FwdIt>
void _Insert(_FwdIt _First, _FwdIt _Last) { // append multiple characters
void _Insert2(_FwdIt _First, _FwdIt _Last) { // append multiple characters
while (_First != _Last) {
_Insert(*_First++);
_Insert2(*_First++);
}
}

private:
void _Expand(unsigned int _Len) { // expand buffer to hold _Len characters
_Elem* _Tmp = static_cast<_Elem*>(_CSTD realloc(_Chrs, _Get_size_of_n<sizeof(_Elem)>(_Len)));
unsigned int _Calculate_expansion(unsigned int _Increase) const {
constexpr size_t _Max_size_t = static_cast<size_t>(-1) / sizeof(_Elem);
constexpr unsigned int _Max_uint = static_cast<unsigned int>(-1);
constexpr unsigned int _Max = _Max_size_t < _Max_uint ? static_cast<unsigned int>(_Max_size_t) : _Max_uint;

if (_Increase < _Buf_incr) {
_Increase = _Buf_incr;
}
if (_Increase < (_Sz >> 1)) {
_Increase = _Sz >> 1;
}

if (_Max <= _Increase || _Max - _Increase <= _Sz) {
return _Max;
}

return _Sz + _Increase;
}

void _Expand2(const unsigned int _Min_increase) { // expand buffer by at least _Min_increase
const unsigned int _Len = _Calculate_expansion(_Min_increase);
if (_Len - _Sz < _Min_increase) {
_Xregex_error(regex_constants::error_space);
}
_Elem* _Tmp = static_cast<_Elem*>(_CSTD realloc(_Chrs, sizeof(_Elem) * static_cast<size_t>(_Len)));
if (!_Tmp) {
_Xbad_alloc();
_Xregex_error(regex_constants::error_space);
}

_Chrs = _Tmp;
Expand Down Expand Up @@ -2835,7 +2858,7 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_char(_Elem _Ch) { // append charac
}

_Node_str<_Elem>* _Node = static_cast<_Node_str<_Elem>*>(_Current);
_Node->_Data._Insert(_Ch);
_Node->_Data._Insert2(_Ch);
}

template <class _FwdIt, class _Elem, class _RxTraits>
Expand Down Expand Up @@ -2869,7 +2892,7 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_char_to_array(_Elem _Ch) { // appe
_Node->_Large = new _Buf<_Elem>;
}

_Node->_Large->_Insert(_Ch);
_Node->_Large->_Insert2(_Ch);
}

template <class _FwdIt, class _Elem, class _RxTraits>
Expand Down Expand Up @@ -2912,8 +2935,8 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_range(_Elem _Arg0, _Elem _Arg1) {
_Node->_Ranges = new _Buf<_Elem>;
}

_Node->_Ranges->_Insert(static_cast<_Elem>(_Ex0));
_Node->_Ranges->_Insert(static_cast<_Elem>(_Ex1));
_Node->_Ranges->_Insert2(static_cast<_Elem>(_Ex0));
_Node->_Ranges->_Insert2(static_cast<_Elem>(_Ex1));
}
}
}
Expand Down Expand Up @@ -2957,7 +2980,7 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_Char_to_elts(_FwdIt _First, _FwdIt _La
*_Cur = new _Sequence<_Elem>(static_cast<unsigned int>(_Diff));
(*_Cur)->_Next = _Node;
}
(*_Cur)->_Data._Insert(_First, _Last);
(*_Cur)->_Data._Insert2(_First, _Last);
}

template <class _FwdIt, class _Elem, class _RxTraits>
Expand Down