Skip to content

Commit 142f48a

Browse files
committed
Fix: Self-assignment issues
1 parent 0a71676 commit 142f48a

File tree

3 files changed

+59
-4
lines changed

3 files changed

+59
-4
lines changed

include/stringzilla/stringzilla.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3256,7 +3256,7 @@ SZ_PUBLIC sz_ptr_t sz_string_shrink_to_fit(sz_string_t *string, sz_memory_alloca
32563256

32573257
// We may already be space-optimal, and in that case we don't need to do anything.
32583258
sz_size_t new_space = string_length + 1;
3259-
if (string_space == new_space || string_is_external) return string->external.start;
3259+
if (string_space == new_space || !string_is_external) return string->external.start;
32603260

32613261
sz_ptr_t new_start = (sz_ptr_t)allocator->allocate(new_space, allocator->handle);
32623262
if (!new_start) return SZ_NULL_CHAR;

include/stringzilla/stringzilla.hpp

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1972,6 +1972,12 @@ class basic_string {
19721972
*/
19731973
static constexpr size_type npos = SZ_SSIZE_MAX;
19741974

1975+
/**
1976+
* @brief The number of characters that can be stored in the internal buffer.
1977+
* Depends on the size of the internal buffer for the "Small String Optimization".
1978+
*/
1979+
static constexpr size_type min_capacity = SZ_STRING_INTERNAL_SPACE - 1;
1980+
19751981
#pragma region Constructors and STL Utilities
19761982

19771983
sz_constexpr_if_cpp20 basic_string() noexcept {
@@ -3362,10 +3368,19 @@ bool basic_string<char_type_, allocator_>::try_assign(string_view other) noexcep
33623368
sz_size_t string_length;
33633369
sz_string_range(&string_, &string_start, &string_length);
33643370

3365-
if (string_length >= other.length()) {
3371+
// One nasty special case is when the other string is a substring of this string.
3372+
// We need to handle that separately, as the `sz_string_expand` may invalidate the `other` pointer.
3373+
if (other.data() >= string_start && other.data() < string_start + string_length) {
3374+
auto offset_in_this = other.data() - string_start;
3375+
sz_string_erase(&string_, 0, offset_in_this);
3376+
sz_string_erase(&string_, other.length(), SZ_SIZE_MAX);
3377+
}
3378+
// In some of the other cases, when the assigned string is short, we don't need to re-allocate.
3379+
else if (string_length >= other.length()) {
33663380
other.copy(string_start, other.length());
33673381
sz_string_erase(&string_, other.length(), SZ_SIZE_MAX);
33683382
}
3383+
// In the common case, however, we need to allocate.
33693384
else {
33703385
if (!_with_alloc([&](sz_alloc_type &alloc) {
33713386
string_start = sz_string_expand(&string_, SZ_SIZE_MAX, other.length(), &alloc);
@@ -3392,10 +3407,21 @@ bool basic_string<char_type_, allocator_>::try_push_back(char_type c) noexcept {
33923407
template <typename char_type_, typename allocator_>
33933408
bool basic_string<char_type_, allocator_>::try_append(const_pointer str, size_type length) noexcept {
33943409
return _with_alloc([&](sz_alloc_type &alloc) {
3395-
auto old_size = size();
3410+
// Sometimes we are inserting part of this string into itself.
3411+
// By the time `sz_string_expand` finished, the old `str` pointer may be invalidated,
3412+
// so we need to handle that special case separately.
3413+
auto this_span = span();
3414+
if (str >= this_span.begin() && str < this_span.end()) {
3415+
auto str_offset_in_this = str - data();
33963416
sz_ptr_t start = sz_string_expand(&string_, SZ_SIZE_MAX, length, &alloc);
33973417
if (!start) return false;
3398-
sz_copy(start + old_size, str, length);
3418+
sz_copy(start + this_span.size(), start + str_offset_in_this, length);
3419+
}
3420+
else {
3421+
sz_ptr_t start = sz_string_expand(&string_, SZ_SIZE_MAX, length, &alloc);
3422+
if (!start) return false;
3423+
sz_copy(start + this_span.size(), str, length);
3424+
}
33993425
return true;
34003426
});
34013427
}

scripts/test.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,13 +431,20 @@ static void test_api_mutable() {
431431
assert_scoped(str s = "obsolete", s.assign(s, 4), s == "lete"); // Partial self-assignment
432432
assert_scoped(str s = "obsolete", s.assign(s, 4, 3), s == "let"); // Partial self-assignment
433433

434+
// Self-assignment is a special case of assignment.
435+
assert_scoped(str s = "obsolete", s = s, s == "obsolete");
436+
assert_scoped(str s = "obsolete", s.assign(s), s == "obsolete");
437+
assert_scoped(str s = "obsolete", s.assign(s.data(), 2), s == "ob");
438+
assert_scoped(str s = "obsolete", s.assign(s.data(), s.size()), s == "obsolete");
439+
434440
// Allocations, capacity and memory management.
435441
assert_scoped(str s, s.reserve(10), s.capacity() >= 10);
436442
assert_scoped(str s, s.resize(10), s.size() == 10);
437443
assert_scoped(str s, s.resize(10, 'a'), s.size() == 10 && s == "aaaaaaaaaa");
438444
assert(str().max_size() > 0);
439445
assert(str().get_allocator() == std::allocator<char>());
440446
assert(std::strcmp(str("c_str").c_str(), "c_str") == 0);
447+
assert_scoped(str s = "hello", s.shrink_to_fit(), s.capacity() <= sz::string::min_capacity);
441448

442449
// Concatenation.
443450
// Following are missing in strings, but are present in vectors.
@@ -627,6 +634,28 @@ static void test_api_readonly_extensions() {
627634
void test_api_mutable_extensions() {
628635
using str = sz::string;
629636

637+
// Try methods.
638+
assert(str("obsolete").try_assign("hello"));
639+
assert(str().try_reserve(10));
640+
assert(str().try_resize(10));
641+
assert(str("__").try_insert(1, "test"));
642+
assert(str("test").try_erase(1, 2));
643+
assert(str("test").try_clear());
644+
assert(str("test").try_replace(1, 2, "aaaa"));
645+
assert(str("test").try_push_back('a'));
646+
assert(str("test").try_shrink_to_fit());
647+
648+
// Self-referencing methods.
649+
assert_scoped(str s = "test", s.try_assign(s.view()), s == "test");
650+
assert_scoped(str s = "test", s.try_assign(s.view().sub(1, 2)), s == "e");
651+
assert_scoped(str s = "test", s.try_append(s.view().sub(1, 2)), s == "teste");
652+
653+
// Try methods going beyond and beneath capacity threshold.
654+
assert_scoped(str s = "0123456789012345678901234567890123456789012345678901234567890123", // 64 symbols at start
655+
s.try_append(s) && s.try_append(s) && s.try_append(s) && s.try_append(s) && s.try_clear() &&
656+
s.try_shrink_to_fit(),
657+
s.capacity() < sz::string::min_capacity);
658+
630659
// Same length replacements.
631660
assert_scoped(str s = "hello", s.replace_all("xx", "xx"), s == "hello");
632661
assert_scoped(str s = "hello", s.replace_all("l", "1"), s == "he11o");

0 commit comments

Comments
 (0)