blob: 4bb22e54c76fb3916160d3950d8a2cc2cf6585a4 [file] [log] [blame]
Avi Drissmane4622aa2022-09-08 20:36:061// Copyright 2013 The Chromium Authors
license.botbf09a502008-08-24 00:55:552// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
initial.commitd7cae122008-07-26 21:49:384
avi@chromium.org8ad97ad2013-06-08 06:05:475#include "base/strings/string_util.h"
6
mmentovai@google.com703f427e2008-08-13 01:17:187#include <math.h>
mmentovai@google.com604a7be2008-08-12 19:23:148#include <stdarg.h>
avi84f37e12015-12-25 09:31:429#include <stddef.h>
10#include <stdint.h>
mmentovai@google.com60632022008-08-12 18:48:5811
thestig@chromium.org7b527b92013-11-22 02:29:0812#include <algorithm>
Jan Wilken Dörriead587c32021-03-11 14:09:2713#include <string>
jdoerriea1771352018-12-19 18:48:1714#include <type_traits>
mmentovai@google.com703f427e2008-08-13 01:17:1815
OlivierLi02b94c52020-05-29 21:30:5616#include "base/bits.h"
Jan Wilken Dörrie5db50ac2021-02-15 11:43:1617#include "base/strings/string_piece.h"
avi@chromium.orga4ea1f12013-06-07 18:37:0718#include "base/strings/utf_string_conversions.h"
jdoerriea1771352018-12-19 18:48:1719#include "build/build_config.h"
willchan@chromium.org270c6412010-03-29 22:02:4720#include "testing/gmock/include/gmock/gmock.h"
initial.commitd7cae122008-07-26 21:49:3821#include "testing/gtest/include/gtest/gtest.h"
22
willchan@chromium.org270c6412010-03-29 22:02:4723using ::testing::ElementsAre;
24
brettw@chromium.orgd6e58c6e2009-10-10 20:40:5025namespace base {
26
Peter Kasting3c5f1db2023-03-09 21:50:5927namespace {
28
29const struct trim_case {
initial.commitd7cae122008-07-26 21:49:3830 const wchar_t* input;
31 const TrimPositions positions;
32 const wchar_t* output;
33 const TrimPositions return_value;
34} trim_cases[] = {
Peter Kasting3c5f1db2023-03-09 21:50:5935 {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
36 {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
37 {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
38 {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
39 {L"", TRIM_ALL, L"", TRIM_NONE},
40 {L" ", TRIM_LEADING, L"", TRIM_LEADING},
41 {L" ", TRIM_TRAILING, L"", TRIM_TRAILING},
42 {L" ", TRIM_ALL, L"", TRIM_ALL},
43 {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
44 {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
initial.commitd7cae122008-07-26 21:49:3845};
46
Peter Kasting3c5f1db2023-03-09 21:50:5947const struct trim_case_ascii {
initial.commitd7cae122008-07-26 21:49:3848 const char* input;
49 const TrimPositions positions;
50 const char* output;
51 const TrimPositions return_value;
52} trim_cases_ascii[] = {
Peter Kasting3c5f1db2023-03-09 21:50:5953 {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
54 {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
55 {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
56 {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
57 {"", TRIM_ALL, "", TRIM_NONE},
58 {" ", TRIM_LEADING, "", TRIM_LEADING},
59 {" ", TRIM_TRAILING, "", TRIM_TRAILING},
60 {" ", TRIM_ALL, "", TRIM_ALL},
61 {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
initial.commitd7cae122008-07-26 21:49:3862};
63
nick@chromium.orgd06e3e02010-05-28 20:18:2064// Helper used to test TruncateUTF8ToByteSize.
pkasting@chromium.orged8e57da2014-07-03 07:03:3965bool Truncated(const std::string& input,
66 const size_t byte_size,
nick@chromium.orgd06e3e02010-05-28 20:18:2067 std::string* output) {
68 size_t prev = input.length();
69 TruncateUTF8ToByteSize(input, byte_size, output);
jerrica@google.coma89dc1ed2010-06-17 16:20:3370 return prev != output->length();
nick@chromium.orgd06e3e02010-05-28 20:18:2071}
72
Mathias Bynenscdfff6b2020-01-22 15:51:4673using TestFunction = bool (*)(StringPiece str);
74
Peter Kasting3c5f1db2023-03-09 21:50:5975// Helper used to test IsStringUTF8[AllowingNoncharacters].
Mathias Bynenscdfff6b2020-01-22 15:51:4676void TestStructurallyValidUtf8(TestFunction fn) {
77 EXPECT_TRUE(fn("abc"));
78 EXPECT_TRUE(fn("\xC2\x81"));
79 EXPECT_TRUE(fn("\xE1\x80\xBF"));
80 EXPECT_TRUE(fn("\xF1\x80\xA0\xBF"));
81 EXPECT_TRUE(fn("\xF1\x80\xA0\xBF"));
82 EXPECT_TRUE(fn("a\xC2\x81\xE1\x80\xBF\xF1\x80\xA0\xBF"));
83
84 // U+FEFF used as UTF-8 BOM.
85 // clang-format off
86 EXPECT_TRUE(fn("\xEF\xBB\xBF" "abc"));
87 // clang-format on
88
89 // Embedded nulls in canonical UTF-8 representation.
90 using std::string_literals::operator""s;
91 const std::string kEmbeddedNull = "embedded\0null"s;
92 EXPECT_TRUE(fn(kEmbeddedNull));
93}
94
Peter Kasting3c5f1db2023-03-09 21:50:5995// Helper used to test IsStringUTF8[AllowingNoncharacters].
Mathias Bynenscdfff6b2020-01-22 15:51:4696void TestStructurallyInvalidUtf8(TestFunction fn) {
97 // Invalid encoding of U+1FFFE (0x8F instead of 0x9F)
98 EXPECT_FALSE(fn("\xF0\x8F\xBF\xBE"));
99
100 // Surrogate code points
101 EXPECT_FALSE(fn("\xED\xA0\x80\xED\xBF\xBF"));
102 EXPECT_FALSE(fn("\xED\xA0\x8F"));
103 EXPECT_FALSE(fn("\xED\xBF\xBF"));
104
105 // Overlong sequences
106 EXPECT_FALSE(fn("\xC0\x80")); // U+0000
107 EXPECT_FALSE(fn("\xC1\x80\xC1\x81")); // "AB"
108 EXPECT_FALSE(fn("\xE0\x80\x80")); // U+0000
109 EXPECT_FALSE(fn("\xE0\x82\x80")); // U+0080
110 EXPECT_FALSE(fn("\xE0\x9F\xBF")); // U+07FF
111 EXPECT_FALSE(fn("\xF0\x80\x80\x8D")); // U+000D
112 EXPECT_FALSE(fn("\xF0\x80\x82\x91")); // U+0091
113 EXPECT_FALSE(fn("\xF0\x80\xA0\x80")); // U+0800
114 EXPECT_FALSE(fn("\xF0\x8F\xBB\xBF")); // U+FEFF (BOM)
115 EXPECT_FALSE(fn("\xF8\x80\x80\x80\xBF")); // U+003F
116 EXPECT_FALSE(fn("\xFC\x80\x80\x80\xA0\xA5")); // U+00A5
117
118 // Beyond U+10FFFF (the upper limit of Unicode codespace)
119 EXPECT_FALSE(fn("\xF4\x90\x80\x80")); // U+110000
120 EXPECT_FALSE(fn("\xF8\xA0\xBF\x80\xBF")); // 5 bytes
121 EXPECT_FALSE(fn("\xFC\x9C\xBF\x80\xBF\x80")); // 6 bytes
122
123 // BOM in UTF-16(BE|LE)
124 EXPECT_FALSE(fn("\xFE\xFF"));
125 EXPECT_FALSE(fn("\xFF\xFE"));
126
127 // Strings in legacy encodings. We can certainly make up strings
128 // in a legacy encoding that are valid in UTF-8, but in real data,
129 // most of them are invalid as UTF-8.
130
131 // cafe with U+00E9 in ISO-8859-1
132 EXPECT_FALSE(fn("caf\xE9"));
133 // U+AC00, U+AC001 in EUC-KR
134 EXPECT_FALSE(fn("\xB0\xA1\xB0\xA2"));
135 // U+4F60 U+597D in Big5
136 EXPECT_FALSE(fn("\xA7\x41\xA6\x6E"));
137 // "abc" with U+201[CD] in windows-125[0-8]
138 // clang-format off
139 EXPECT_FALSE(fn("\x93" "abc\x94"));
140 // clang-format on
141 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
142 EXPECT_FALSE(fn("\xD9\xEE\xE4\xEE"));
143 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
144 EXPECT_FALSE(fn("\xE3\xE5\xE9\xDC"));
145
146 // BOM in UTF-32(BE|LE)
147 using std::string_literals::operator""s;
148 const std::string kUtf32BeBom = "\x00\x00\xFE\xFF"s;
149 EXPECT_FALSE(fn(kUtf32BeBom));
150 const std::string kUtf32LeBom = "\xFF\xFE\x00\x00"s;
151 EXPECT_FALSE(fn(kUtf32LeBom));
152}
153
Peter Kasting3c5f1db2023-03-09 21:50:59154// Helper used to test IsStringUTF8[AllowingNoncharacters].
Mathias Bynenscdfff6b2020-01-22 15:51:46155void TestNoncharacters(TestFunction fn, bool expected_result) {
156 EXPECT_EQ(fn("\xEF\xB7\x90"), expected_result); // U+FDD0
157 EXPECT_EQ(fn("\xEF\xB7\x9F"), expected_result); // U+FDDF
158 EXPECT_EQ(fn("\xEF\xB7\xAF"), expected_result); // U+FDEF
159 EXPECT_EQ(fn("\xEF\xBF\xBE"), expected_result); // U+FFFE
160 EXPECT_EQ(fn("\xEF\xBF\xBF"), expected_result); // U+FFFF
161 EXPECT_EQ(fn("\xF0\x9F\xBF\xBE"), expected_result); // U+01FFFE
162 EXPECT_EQ(fn("\xF0\x9F\xBF\xBF"), expected_result); // U+01FFFF
163 EXPECT_EQ(fn("\xF0\xAF\xBF\xBE"), expected_result); // U+02FFFE
164 EXPECT_EQ(fn("\xF0\xAF\xBF\xBF"), expected_result); // U+02FFFF
165 EXPECT_EQ(fn("\xF0\xBF\xBF\xBE"), expected_result); // U+03FFFE
166 EXPECT_EQ(fn("\xF0\xBF\xBF\xBF"), expected_result); // U+03FFFF
167 EXPECT_EQ(fn("\xF1\x8F\xBF\xBE"), expected_result); // U+04FFFE
168 EXPECT_EQ(fn("\xF1\x8F\xBF\xBF"), expected_result); // U+04FFFF
169 EXPECT_EQ(fn("\xF1\x9F\xBF\xBE"), expected_result); // U+05FFFE
170 EXPECT_EQ(fn("\xF1\x9F\xBF\xBF"), expected_result); // U+05FFFF
171 EXPECT_EQ(fn("\xF1\xAF\xBF\xBE"), expected_result); // U+06FFFE
172 EXPECT_EQ(fn("\xF1\xAF\xBF\xBF"), expected_result); // U+06FFFF
173 EXPECT_EQ(fn("\xF1\xBF\xBF\xBE"), expected_result); // U+07FFFE
174 EXPECT_EQ(fn("\xF1\xBF\xBF\xBF"), expected_result); // U+07FFFF
175 EXPECT_EQ(fn("\xF2\x8F\xBF\xBE"), expected_result); // U+08FFFE
176 EXPECT_EQ(fn("\xF2\x8F\xBF\xBF"), expected_result); // U+08FFFF
177 EXPECT_EQ(fn("\xF2\x9F\xBF\xBE"), expected_result); // U+09FFFE
178 EXPECT_EQ(fn("\xF2\x9F\xBF\xBF"), expected_result); // U+09FFFF
179 EXPECT_EQ(fn("\xF2\xAF\xBF\xBE"), expected_result); // U+0AFFFE
180 EXPECT_EQ(fn("\xF2\xAF\xBF\xBF"), expected_result); // U+0AFFFF
181 EXPECT_EQ(fn("\xF2\xBF\xBF\xBE"), expected_result); // U+0BFFFE
182 EXPECT_EQ(fn("\xF2\xBF\xBF\xBF"), expected_result); // U+0BFFFF
183 EXPECT_EQ(fn("\xF3\x8F\xBF\xBE"), expected_result); // U+0CFFFE
184 EXPECT_EQ(fn("\xF3\x8F\xBF\xBF"), expected_result); // U+0CFFFF
185 EXPECT_EQ(fn("\xF3\x9F\xBF\xBE"), expected_result); // U+0DFFFE
186 EXPECT_EQ(fn("\xF3\x9F\xBF\xBF"), expected_result); // U+0DFFFF
187 EXPECT_EQ(fn("\xF3\xAF\xBF\xBE"), expected_result); // U+0EFFFE
188 EXPECT_EQ(fn("\xF3\xAF\xBF\xBF"), expected_result); // U+0EFFFF
189 EXPECT_EQ(fn("\xF3\xBF\xBF\xBE"), expected_result); // U+0FFFFE
190 EXPECT_EQ(fn("\xF3\xBF\xBF\xBF"), expected_result); // U+0FFFFF
191 EXPECT_EQ(fn("\xF4\x8F\xBF\xBE"), expected_result); // U+10FFFE
192 EXPECT_EQ(fn("\xF4\x8F\xBF\xBF"), expected_result); // U+10FFFF
193}
194
nick@chromium.orgd06e3e02010-05-28 20:18:20195TEST(StringUtilTest, TruncateUTF8ToByteSize) {
196 std::string output;
197
198 // Empty strings and invalid byte_size arguments
dcheng@chromium.org007b3f82013-04-09 08:46:45199 EXPECT_FALSE(Truncated(std::string(), 0, &output));
nick@chromium.orgd06e3e02010-05-28 20:18:20200 EXPECT_EQ(output, "");
201 EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
202 EXPECT_EQ(output, "");
pkasting@chromium.orged8e57da2014-07-03 07:03:39203 EXPECT_FALSE(Truncated("\xe1\x80\xbf", static_cast<size_t>(-1), &output));
nick@chromium.orgd06e3e02010-05-28 20:18:20204 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
205
206 // Testing the truncation of valid UTF8 correctly
207 EXPECT_TRUE(Truncated("abc", 2, &output));
208 EXPECT_EQ(output, "ab");
209 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
210 EXPECT_EQ(output.compare("\xc2\x81"), 0);
211 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
212 EXPECT_EQ(output.compare("\xc2\x81"), 0);
213 EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
214 EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
215
216 {
217 const char array[] = "\x00\x00\xc2\x81\xc2\x81";
Daniel Chengf45f47602022-02-28 22:38:32218 const std::string array_string(array, std::size(array));
nick@chromium.orgd06e3e02010-05-28 20:18:20219 EXPECT_TRUE(Truncated(array_string, 4, &output));
220 EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
221 }
222
223 {
224 const char array[] = "\x00\xc2\x81\xc2\x81";
Daniel Chengf45f47602022-02-28 22:38:32225 const std::string array_string(array, std::size(array));
nick@chromium.orgd06e3e02010-05-28 20:18:20226 EXPECT_TRUE(Truncated(array_string, 4, &output));
227 EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
228 }
229
230 // Testing invalid UTF8
231 EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
232 EXPECT_EQ(output.compare(""), 0);
233 EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
234 EXPECT_EQ(output.compare(""), 0);
235 EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
236 EXPECT_EQ(output.compare(""), 0);
237
238 // Testing invalid UTF8 mixed with valid UTF8
239 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
240 EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
241 EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
242 EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
243 EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
244 10, &output));
245 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
246 EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
247 10, &output));
248 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
249 EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
250 EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
251
252 // Overlong sequences
253 EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
254 EXPECT_EQ(output.compare(""), 0);
255 EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
256 EXPECT_EQ(output.compare(""), 0);
257 EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
258 EXPECT_EQ(output.compare(""), 0);
259 EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
260 EXPECT_EQ(output.compare(""), 0);
261 EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
262 EXPECT_EQ(output.compare(""), 0);
263 EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
264 EXPECT_EQ(output.compare(""), 0);
265 EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
266 EXPECT_EQ(output.compare(""), 0);
267 EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
268 EXPECT_EQ(output.compare(""), 0);
269 EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
270 EXPECT_EQ(output.compare(""), 0);
271 EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
272 EXPECT_EQ(output.compare(""), 0);
273 EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
274 EXPECT_EQ(output.compare(""), 0);
275
276 // Beyond U+10FFFF (the upper limit of Unicode codespace)
277 EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
278 EXPECT_EQ(output.compare(""), 0);
279 EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
280 EXPECT_EQ(output.compare(""), 0);
281 EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
282 EXPECT_EQ(output.compare(""), 0);
283
284 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
285 EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
286 EXPECT_EQ(output.compare(""), 0);
287 EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
288 EXPECT_EQ(output.compare(""), 0);
289
290 {
291 const char array[] = "\x00\x00\xfe\xff";
Daniel Chengf45f47602022-02-28 22:38:32292 const std::string array_string(array, std::size(array));
nick@chromium.orgd06e3e02010-05-28 20:18:20293 EXPECT_TRUE(Truncated(array_string, 4, &output));
294 EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
295 }
296
297 // Variants on the previous test
298 {
299 const char array[] = "\xff\xfe\x00\x00";
300 const std::string array_string(array, 4);
301 EXPECT_FALSE(Truncated(array_string, 4, &output));
302 EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
303 }
304 {
305 const char array[] = "\xff\x00\x00\xfe";
Daniel Chengf45f47602022-02-28 22:38:32306 const std::string array_string(array, std::size(array));
nick@chromium.orgd06e3e02010-05-28 20:18:20307 EXPECT_TRUE(Truncated(array_string, 4, &output));
308 EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
309 }
310
311 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
312 EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
313 EXPECT_EQ(output.compare(""), 0);
314 EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
315 EXPECT_EQ(output.compare(""), 0);
316 EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
317 EXPECT_EQ(output.compare(""), 0);
318 EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
319 EXPECT_EQ(output.compare(""), 0);
320 EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
321 EXPECT_EQ(output.compare(""), 0);
322
323 // Strings in legacy encodings that are valid in UTF-8, but
324 // are invalid as UTF-8 in real data.
325 EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
326 EXPECT_EQ(output.compare("caf"), 0);
327 EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
328 EXPECT_EQ(output.compare(""), 0);
329 EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
330 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
331 EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
332 &output));
333 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
334
335 // Testing using the same string as input and output.
336 EXPECT_FALSE(Truncated(output, 4, &output));
337 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
338 EXPECT_TRUE(Truncated(output, 3, &output));
339 EXPECT_EQ(output.compare("\xa7\x41"), 0);
340
341 // "abc" with U+201[CD] in windows-125[0-8]
342 EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
343 EXPECT_EQ(output.compare("\x93" "abc"), 0);
344
345 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
346 EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
347 EXPECT_EQ(output.compare(""), 0);
348
349 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
350 EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
351 EXPECT_EQ(output.compare(""), 0);
352}
353
jdoerriea1771352018-12-19 18:48:17354#if defined(WCHAR_T_IS_UTF16)
jdoerriebacc1962019-02-07 13:39:22355TEST(StringUtilTest, as_wcstr) {
Jan Wilken Dörrie677e0c872021-03-10 10:04:38356 char16_t rw_buffer[10] = {};
jdoerriebacc1962019-02-07 13:39:22357 static_assert(
358 std::is_same<wchar_t*, decltype(as_writable_wcstr(rw_buffer))>::value,
359 "");
360 EXPECT_EQ(static_cast<void*>(rw_buffer), as_writable_wcstr(rw_buffer));
jdoerriea1771352018-12-19 18:48:17361
Jan Wilken Dörrie085b2aa2021-03-12 16:26:57362 std::u16string rw_str(10, '\0');
jdoerriebacc1962019-02-07 13:39:22363 static_assert(
364 std::is_same<wchar_t*, decltype(as_writable_wcstr(rw_str))>::value, "");
365 EXPECT_EQ(static_cast<const void*>(rw_str.data()), as_writable_wcstr(rw_str));
jdoerriea1771352018-12-19 18:48:17366
Jan Wilken Dörrie677e0c872021-03-10 10:04:38367 const char16_t ro_buffer[10] = {};
jdoerriebacc1962019-02-07 13:39:22368 static_assert(
369 std::is_same<const wchar_t*, decltype(as_wcstr(ro_buffer))>::value, "");
370 EXPECT_EQ(static_cast<const void*>(ro_buffer), as_wcstr(ro_buffer));
jdoerriea1771352018-12-19 18:48:17371
Jan Wilken Dörrie085b2aa2021-03-12 16:26:57372 const std::u16string ro_str(10, '\0');
jdoerriebacc1962019-02-07 13:39:22373 static_assert(std::is_same<const wchar_t*, decltype(as_wcstr(ro_str))>::value,
jdoerriea1771352018-12-19 18:48:17374 "");
jdoerriebacc1962019-02-07 13:39:22375 EXPECT_EQ(static_cast<const void*>(ro_str.data()), as_wcstr(ro_str));
jdoerriea1771352018-12-19 18:48:17376
377 StringPiece16 piece = ro_buffer;
jdoerriebacc1962019-02-07 13:39:22378 static_assert(std::is_same<const wchar_t*, decltype(as_wcstr(piece))>::value,
jdoerriea1771352018-12-19 18:48:17379 "");
jdoerriebacc1962019-02-07 13:39:22380 EXPECT_EQ(static_cast<const void*>(piece.data()), as_wcstr(piece));
381}
382
383TEST(StringUtilTest, as_u16cstr) {
384 wchar_t rw_buffer[10] = {};
385 static_assert(
Jan Wilken Dörrie677e0c872021-03-10 10:04:38386 std::is_same<char16_t*, decltype(as_writable_u16cstr(rw_buffer))>::value,
jdoerriebacc1962019-02-07 13:39:22387 "");
388 EXPECT_EQ(static_cast<void*>(rw_buffer), as_writable_u16cstr(rw_buffer));
389
390 std::wstring rw_str(10, '\0');
391 static_assert(
Jan Wilken Dörrie677e0c872021-03-10 10:04:38392 std::is_same<char16_t*, decltype(as_writable_u16cstr(rw_str))>::value,
393 "");
jdoerriebacc1962019-02-07 13:39:22394 EXPECT_EQ(static_cast<const void*>(rw_str.data()),
395 as_writable_u16cstr(rw_str));
396
397 const wchar_t ro_buffer[10] = {};
398 static_assert(
Jan Wilken Dörrie677e0c872021-03-10 10:04:38399 std::is_same<const char16_t*, decltype(as_u16cstr(ro_buffer))>::value,
400 "");
jdoerriebacc1962019-02-07 13:39:22401 EXPECT_EQ(static_cast<const void*>(ro_buffer), as_u16cstr(ro_buffer));
402
403 const std::wstring ro_str(10, '\0');
404 static_assert(
Jan Wilken Dörrie677e0c872021-03-10 10:04:38405 std::is_same<const char16_t*, decltype(as_u16cstr(ro_str))>::value, "");
jdoerriebacc1962019-02-07 13:39:22406 EXPECT_EQ(static_cast<const void*>(ro_str.data()), as_u16cstr(ro_str));
407
408 WStringPiece piece = ro_buffer;
Jan Wilken Dörrie677e0c872021-03-10 10:04:38409 static_assert(
410 std::is_same<const char16_t*, decltype(as_u16cstr(piece))>::value, "");
jdoerriebacc1962019-02-07 13:39:22411 EXPECT_EQ(static_cast<const void*>(piece.data()), as_u16cstr(piece));
jdoerriea1771352018-12-19 18:48:17412}
413#endif // defined(WCHAR_T_IS_UTF16)
414
initial.commitd7cae122008-07-26 21:49:38415TEST(StringUtilTest, TrimWhitespace) {
Jan Wilken Dörrie085b2aa2021-03-12 16:26:57416 std::u16string output; // Allow contents to carry over to next testcase
jdoerrie6c6229352018-10-22 15:55:43417 for (const auto& value : trim_cases) {
initial.commitd7cae122008-07-26 21:49:38418 EXPECT_EQ(value.return_value,
evan@chromium.orgcf81d2d2011-05-10 17:42:13419 TrimWhitespace(WideToUTF16(value.input), value.positions,
420 &output));
421 EXPECT_EQ(WideToUTF16(value.output), output);
initial.commitd7cae122008-07-26 21:49:38422 }
423
424 // Test that TrimWhitespace() can take the same string for input and output
Jan Wilken Dörriec92a6d7242021-03-23 17:43:48425 output = u" This is a test \r\n";
initial.commitd7cae122008-07-26 21:49:38426 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
Jan Wilken Dörriec92a6d7242021-03-23 17:43:48427 EXPECT_EQ(u"This is a test", output);
initial.commitd7cae122008-07-26 21:49:38428
429 // Once more, but with a string of whitespace
Jan Wilken Dörriec92a6d7242021-03-23 17:43:48430 output = u" \r\n";
initial.commitd7cae122008-07-26 21:49:38431 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
Jan Wilken Dörrie085b2aa2021-03-12 16:26:57432 EXPECT_EQ(std::u16string(), output);
initial.commitd7cae122008-07-26 21:49:38433
434 std::string output_ascii;
jdoerrie6c6229352018-10-22 15:55:43435 for (const auto& value : trim_cases_ascii) {
initial.commitd7cae122008-07-26 21:49:38436 EXPECT_EQ(value.return_value,
tfarina023b1dcc2015-12-06 13:25:41437 TrimWhitespaceASCII(value.input, value.positions, &output_ascii));
initial.commitd7cae122008-07-26 21:49:38438 EXPECT_EQ(value.output, output_ascii);
439 }
440}
441
442static const struct collapse_case {
443 const wchar_t* input;
444 const bool trim;
445 const wchar_t* output;
446} collapse_cases[] = {
447 {L" Google Video ", false, L"Google Video"},
448 {L"Google Video", false, L"Google Video"},
449 {L"", false, L""},
450 {L" ", false, L""},
451 {L"\t\rTest String\n", false, L"Test String"},
452 {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
453 {L" Test \n \t String ", false, L"Test String"},
454 {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
455 {L" Test String", false, L"Test String"},
456 {L"Test String ", false, L"Test String"},
457 {L"Test String", false, L"Test String"},
458 {L"", true, L""},
459 {L"\n", true, L""},
460 {L" \r ", true, L""},
461 {L"\nFoo", true, L"Foo"},
462 {L"\r Foo ", true, L"Foo"},
463 {L" Foo bar ", true, L"Foo bar"},
464 {L" \tFoo bar \n", true, L"Foo bar"},
465 {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
466};
467
468TEST(StringUtilTest, CollapseWhitespace) {
jdoerrie6c6229352018-10-22 15:55:43469 for (const auto& value : collapse_cases) {
thestig@chromium.org7b527b92013-11-22 02:29:08470 EXPECT_EQ(WideToUTF16(value.output),
471 CollapseWhitespace(WideToUTF16(value.input), value.trim));
initial.commitd7cae122008-07-26 21:49:38472 }
473}
474
rafaelw@chromium.org46878fb92009-06-04 23:10:39475static const struct collapse_case_ascii {
476 const char* input;
477 const bool trim;
478 const char* output;
479} collapse_cases_ascii[] = {
Peter Kasting341eece32022-08-24 17:17:51480 {" Google Video ", false, "Google Video"},
481 {"Google Video", false, "Google Video"},
482 {"", false, ""},
483 {" ", false, ""},
484 {"\t\rTest String\n", false, "Test String"},
485 {" Test \n \t String ", false, "Test String"},
486 {" Test String", false, "Test String"},
487 {"Test String ", false, "Test String"},
488 {"Test String", false, "Test String"},
489 {"", true, ""},
490 {"\n", true, ""},
491 {" \r ", true, ""},
492 {"\nFoo", true, "Foo"},
493 {"\r Foo ", true, "Foo"},
494 {" Foo bar ", true, "Foo bar"},
495 // \u00A0 is whitespace, but not _ASCII_ whitespace, so it should not be
496 // collapsed by CollapseWhitespaceASCII().
497 {"Foo\u00A0bar", true, "Foo\u00A0bar"},
498 {" \tFoo bar \n", true, "Foo bar"},
499 {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
rafaelw@chromium.org46878fb92009-06-04 23:10:39500};
501
502TEST(StringUtilTest, CollapseWhitespaceASCII) {
jdoerrie6c6229352018-10-22 15:55:43503 for (const auto& value : collapse_cases_ascii) {
rafaelw@chromium.org46878fb92009-06-04 23:10:39504 EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
505 }
506}
jungshik@google.comc9ec4542008-09-25 21:42:00507
508TEST(StringUtilTest, IsStringUTF8) {
Mathias Bynenscdfff6b2020-01-22 15:51:46509 {
510 SCOPED_TRACE("IsStringUTF8");
511 TestStructurallyValidUtf8(&IsStringUTF8);
512 TestStructurallyInvalidUtf8(&IsStringUTF8);
513 TestNoncharacters(&IsStringUTF8, false);
514 }
jungshik@google.comc9ec4542008-09-25 21:42:00515
Mathias Bynenscdfff6b2020-01-22 15:51:46516 {
517 SCOPED_TRACE("IsStringUTF8AllowingNoncharacters");
518 TestStructurallyValidUtf8(&IsStringUTF8AllowingNoncharacters);
519 TestStructurallyInvalidUtf8(&IsStringUTF8AllowingNoncharacters);
520 TestNoncharacters(&IsStringUTF8AllowingNoncharacters, true);
521 }
jungshik@google.comc9ec4542008-09-25 21:42:00522}
523
Mikhail Naganov6f500092014-09-29 12:01:19524TEST(StringUtilTest, IsStringASCII) {
525 static char char_ascii[] =
526 "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF";
Jan Wilken Dörrie677e0c872021-03-10 10:04:38527 static char16_t char16_ascii[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8',
528 '9', '0', 'A', 'B', 'C', 'D', 'E', 'F', '0',
529 '1', '2', '3', '4', '5', '6', '7', '8', '9',
530 '0', 'A', 'B', 'C', 'D', 'E', 'F', 0};
Mikhail Naganov2e898de2014-10-06 09:18:33531 static std::wstring wchar_ascii(
532 L"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF");
Mikhail Naganov6f500092014-09-29 12:01:19533
534 // Test a variety of the fragment start positions and lengths in order to make
535 // sure that bit masking in IsStringASCII works correctly.
536 // Also, test that a non-ASCII character will be detected regardless of its
537 // position inside the string.
538 {
Daniel Chengf45f47602022-02-28 22:38:32539 const size_t string_length = std::size(char_ascii) - 1;
Mikhail Naganov6f500092014-09-29 12:01:19540 for (size_t offset = 0; offset < 8; ++offset) {
541 for (size_t len = 0, max_len = string_length - offset; len < max_len;
542 ++len) {
543 EXPECT_TRUE(IsStringASCII(StringPiece(char_ascii + offset, len)));
544 for (size_t char_pos = offset; char_pos < len; ++char_pos) {
545 char_ascii[char_pos] |= '\x80';
546 EXPECT_FALSE(IsStringASCII(StringPiece(char_ascii + offset, len)));
547 char_ascii[char_pos] &= ~'\x80';
548 }
549 }
550 }
551 }
552
553 {
Daniel Chengf45f47602022-02-28 22:38:32554 const size_t string_length = std::size(char16_ascii) - 1;
Mikhail Naganov6f500092014-09-29 12:01:19555 for (size_t offset = 0; offset < 4; ++offset) {
556 for (size_t len = 0, max_len = string_length - offset; len < max_len;
557 ++len) {
558 EXPECT_TRUE(IsStringASCII(StringPiece16(char16_ascii + offset, len)));
559 for (size_t char_pos = offset; char_pos < len; ++char_pos) {
560 char16_ascii[char_pos] |= 0x80;
561 EXPECT_FALSE(
562 IsStringASCII(StringPiece16(char16_ascii + offset, len)));
563 char16_ascii[char_pos] &= ~0x80;
564 // Also test when the upper half is non-zero.
565 char16_ascii[char_pos] |= 0x100;
566 EXPECT_FALSE(
567 IsStringASCII(StringPiece16(char16_ascii + offset, len)));
568 char16_ascii[char_pos] &= ~0x100;
569 }
570 }
571 }
572 }
Mikhail Naganov2e898de2014-10-06 09:18:33573
jdoerrie5c4dc4e2019-02-01 18:02:33574#if defined(WCHAR_T_IS_UTF32)
Mikhail Naganov2e898de2014-10-06 09:18:33575 {
576 const size_t string_length = wchar_ascii.length();
577 for (size_t len = 0; len < string_length; ++len) {
578 EXPECT_TRUE(IsStringASCII(wchar_ascii.substr(0, len)));
579 for (size_t char_pos = 0; char_pos < len; ++char_pos) {
580 wchar_ascii[char_pos] |= 0x80;
jdoerrie5c4dc4e2019-02-01 18:02:33581 EXPECT_FALSE(IsStringASCII(wchar_ascii.substr(0, len)));
Mikhail Naganov2e898de2014-10-06 09:18:33582 wchar_ascii[char_pos] &= ~0x80;
583 wchar_ascii[char_pos] |= 0x100;
jdoerrie5c4dc4e2019-02-01 18:02:33584 EXPECT_FALSE(IsStringASCII(wchar_ascii.substr(0, len)));
Mikhail Naganov2e898de2014-10-06 09:18:33585 wchar_ascii[char_pos] &= ~0x100;
Mikhail Naganov2e898de2014-10-06 09:18:33586 wchar_ascii[char_pos] |= 0x10000;
jdoerrie5c4dc4e2019-02-01 18:02:33587 EXPECT_FALSE(IsStringASCII(wchar_ascii.substr(0, len)));
Mikhail Naganov2e898de2014-10-06 09:18:33588 wchar_ascii[char_pos] &= ~0x10000;
Mikhail Naganov2e898de2014-10-06 09:18:33589 }
590 }
591 }
jdoerrie5c4dc4e2019-02-01 18:02:33592#endif // WCHAR_T_IS_UTF32
Mikhail Naganov6f500092014-09-29 12:01:19593}
594
initial.commitd7cae122008-07-26 21:49:38595TEST(StringUtilTest, ConvertASCII) {
thestig073d514d2014-10-21 03:11:21596 static const char* const char_cases[] = {
initial.commitd7cae122008-07-26 21:49:38597 "Google Video",
598 "Hello, world\n",
599 "0123ABCDwxyz \a\b\t\r\n!+,.~"
600 };
601
602 static const wchar_t* const wchar_cases[] = {
603 L"Google Video",
604 L"Hello, world\n",
605 L"0123ABCDwxyz \a\b\t\r\n!+,.~"
606 };
607
Daniel Chengf45f47602022-02-28 22:38:32608 for (size_t i = 0; i < std::size(char_cases); ++i) {
initial.commitd7cae122008-07-26 21:49:38609 EXPECT_TRUE(IsStringASCII(char_cases[i]));
Jan Wilken Dörrie085b2aa2021-03-12 16:26:57610 std::u16string utf16 = ASCIIToUTF16(char_cases[i]);
brettw@chromium.org74f778e2014-03-14 21:11:46611 EXPECT_EQ(WideToUTF16(wchar_cases[i]), utf16);
initial.commitd7cae122008-07-26 21:49:38612
brettw@chromium.org74f778e2014-03-14 21:11:46613 std::string ascii = UTF16ToASCII(WideToUTF16(wchar_cases[i]));
initial.commitd7cae122008-07-26 21:49:38614 EXPECT_EQ(char_cases[i], ascii);
615 }
616
617 EXPECT_FALSE(IsStringASCII("Google \x80Video"));
initial.commitd7cae122008-07-26 21:49:38618
619 // Convert empty strings.
Jan Wilken Dörrie085b2aa2021-03-12 16:26:57620 std::u16string empty16;
initial.commitd7cae122008-07-26 21:49:38621 std::string empty;
brettw@chromium.org74f778e2014-03-14 21:11:46622 EXPECT_EQ(empty, UTF16ToASCII(empty16));
623 EXPECT_EQ(empty16, ASCIIToUTF16(empty));
mmentovai@google.coma218f152008-08-05 22:46:15624
625 // Convert strings with an embedded NUL character.
626 const char chars_with_nul[] = "test\0string";
Daniel Chengf45f47602022-02-28 22:38:32627 const int length_with_nul = std::size(chars_with_nul) - 1;
mmentovai@google.coma218f152008-08-05 22:46:15628 std::string string_with_nul(chars_with_nul, length_with_nul);
Jan Wilken Dörrie085b2aa2021-03-12 16:26:57629 std::u16string string16_with_nul = ASCIIToUTF16(string_with_nul);
630 EXPECT_EQ(static_cast<std::u16string::size_type>(length_with_nul),
thestig93570e22014-12-08 21:00:22631 string16_with_nul.length());
632 std::string narrow_with_nul = UTF16ToASCII(string16_with_nul);
mmentovai@google.com703f427e2008-08-13 01:17:18633 EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
634 narrow_with_nul.length());
mmentovai@google.coma218f152008-08-05 22:46:15635 EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
initial.commitd7cae122008-07-26 21:49:38636}
637
brettwc15100c2015-08-06 22:54:16638TEST(StringUtilTest, ToLowerASCII) {
639 EXPECT_EQ('c', ToLowerASCII('C'));
640 EXPECT_EQ('c', ToLowerASCII('c'));
641 EXPECT_EQ('2', ToLowerASCII('2'));
642
Jan Wilken Dörriecf672362021-03-15 14:16:01643 EXPECT_EQ(u'c', ToLowerASCII(u'C'));
644 EXPECT_EQ(u'c', ToLowerASCII(u'c'));
645 EXPECT_EQ(u'2', ToLowerASCII(u'2'));
brettwc15100c2015-08-06 22:54:16646
647 EXPECT_EQ("cc2", ToLowerASCII("Cc2"));
Jan Wilken Dörriec92a6d7242021-03-23 17:43:48648 EXPECT_EQ(u"cc2", ToLowerASCII(u"Cc2"));
David Benjamin18fb9ea2022-11-22 18:22:33649
650 // Non-ASCII characters are unmodified. U+00C4 is LATIN CAPITAL LETTER A WITH
651 // DIAERESIS.
652 EXPECT_EQ('\xc4', ToLowerASCII('\xc4'));
653 EXPECT_EQ(u'\x00c4', ToLowerASCII(u'\x00c4'));
brettwc15100c2015-08-06 22:54:16654}
655
wtc@chromium.orgdcbffba2009-02-27 03:00:48656TEST(StringUtilTest, ToUpperASCII) {
657 EXPECT_EQ('C', ToUpperASCII('C'));
658 EXPECT_EQ('C', ToUpperASCII('c'));
659 EXPECT_EQ('2', ToUpperASCII('2'));
660
Jan Wilken Dörriecf672362021-03-15 14:16:01661 EXPECT_EQ(u'C', ToUpperASCII(u'C'));
662 EXPECT_EQ(u'C', ToUpperASCII(u'c'));
663 EXPECT_EQ(u'2', ToUpperASCII(u'2'));
wtc@chromium.orgdcbffba2009-02-27 03:00:48664
brettwc15100c2015-08-06 22:54:16665 EXPECT_EQ("CC2", ToUpperASCII("Cc2"));
Jan Wilken Dörriec92a6d7242021-03-23 17:43:48666 EXPECT_EQ(u"CC2", ToUpperASCII(u"Cc2"));
David Benjamin18fb9ea2022-11-22 18:22:33667
668 // Non-ASCII characters are unmodified. U+00E4 is LATIN SMALL LETTER A WITH
669 // DIAERESIS.
670 EXPECT_EQ('\xe4', ToUpperASCII('\xe4'));
671 EXPECT_EQ(u'\x00e4', ToUpperASCII(u'\x00e4'));
wtc@chromium.orgdcbffba2009-02-27 03:00:48672}
673
avi@chromium.org7a3b2632011-06-22 20:40:22674TEST(StringUtilTest, FormatBytesUnlocalized) {
initial.commitd7cae122008-07-26 21:49:38675 static const struct {
avi84f37e12015-12-25 09:31:42676 int64_t bytes;
evan@chromium.org7b83a102010-08-19 23:11:28677 const char* expected;
initial.commitd7cae122008-07-26 21:49:38678 } cases[] = {
OlivierLi02b94c52020-05-29 21:30:56679 // Expected behavior: we show one post-decimal digit when we have
680 // under two pre-decimal digits, except in cases where it makes no
681 // sense (zero or bytes).
682 // Since we switch units once we cross the 1000 mark, this keeps
683 // the display of file sizes or bytes consistently around three
684 // digits.
685 {0, "0 B"},
686 {512, "512 B"},
687 {1024 * 1024, "1.0 MB"},
688 {1024 * 1024 * 1024, "1.0 GB"},
689 {10LL * 1024 * 1024 * 1024, "10.0 GB"},
690 {99LL * 1024 * 1024 * 1024, "99.0 GB"},
691 {105LL * 1024 * 1024 * 1024, "105 GB"},
692 {105LL * 1024 * 1024 * 1024 + 500LL * 1024 * 1024, "105 GB"},
693 {~(bits::LeftmostBit<int64_t>()), "8192 PB"},
evan@chromium.org26cfaf52010-04-15 18:22:57694
OlivierLi02b94c52020-05-29 21:30:56695 {99 * 1024 + 103, "99.1 kB"},
696 {1024 * 1024 + 103, "1.0 MB"},
697 {1024 * 1024 + 205 * 1024, "1.2 MB"},
698 {1024 * 1024 * 1024 + (927 * 1024 * 1024), "1.9 GB"},
699 {10LL * 1024 * 1024 * 1024, "10.0 GB"},
700 {100LL * 1024 * 1024 * 1024, "100 GB"},
initial.commitd7cae122008-07-26 21:49:38701 };
702
jdoerrie6c6229352018-10-22 15:55:43703 for (const auto& i : cases) {
704 EXPECT_EQ(ASCIIToUTF16(i.expected), FormatBytesUnlocalized(i.bytes));
initial.commitd7cae122008-07-26 21:49:38705 }
706}
initial.commitd7cae122008-07-26 21:49:38707TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
708 static const struct {
nick09d9682b2017-08-02 00:31:16709 StringPiece str;
710 size_t start_offset;
711 StringPiece find_this;
712 StringPiece replace_with;
713 StringPiece expected;
initial.commitd7cae122008-07-26 21:49:38714 } cases[] = {
Nick Carterb1e364d2017-09-26 18:28:45715 {"aaa", 0, "", "b", "aaa"},
716 {"aaa", 1, "", "b", "aaa"},
nick09d9682b2017-08-02 00:31:16717 {"aaa", 0, "a", "b", "bbb"},
718 {"aaa", 0, "aa", "b", "ba"},
719 {"aaa", 0, "aa", "bbb", "bbba"},
720 {"aaaaa", 0, "aa", "b", "bba"},
721 {"ababaaababa", 0, "aba", "", "baaba"},
722 {"ababaaababa", 0, "aba", "_", "_baa_ba"},
723 {"ababaaababa", 0, "aba", "__", "__baa__ba"},
724 {"ababaaababa", 0, "aba", "___", "___baa___ba"},
725 {"ababaaababa", 0, "aba", "____", "____baa____ba"},
726 {"ababaaababa", 0, "aba", "_____", "_____baa_____ba"},
727 {"abb", 0, "ab", "a", "ab"},
728 {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
729 {"Not found", 0, "x", "0", "Not found"},
730 {"Not found again", 5, "x", "0", "Not found again"},
731 {" Making it much longer ", 0, " ", "Four score and seven years ago",
732 "Four score and seven years agoMakingFour score and seven years agoit"
733 "Four score and seven years agomuchFour score and seven years agolonger"
734 "Four score and seven years ago"},
735 {" Making it much much much much shorter ", 0,
736 "Making it much much much much shorter", "", " "},
737 {"so much much much much much very much much much shorter", 0, "much ",
738 "", "so very shorter"},
739 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
740 {"Replace me only me once", 9, "me ", "", "Replace me only once"},
741 {"abababab", 2, "ab", "c", "abccc"},
Nick Carterb1e364d2017-09-26 18:28:45742 {"abababab", 1, "ab", "c", "abccc"},
743 {"abababab", 1, "aba", "c", "abcbab"},
initial.commitd7cae122008-07-26 21:49:38744 };
745
Jan Wilken Dörrie85285b02021-03-11 23:38:47746 // std::u16string variant
nick09d9682b2017-08-02 00:31:16747 for (const auto& scenario : cases) {
Jan Wilken Dörrie085b2aa2021-03-12 16:26:57748 std::u16string str = ASCIIToUTF16(scenario.str);
nick09d9682b2017-08-02 00:31:16749 ReplaceSubstringsAfterOffset(&str, scenario.start_offset,
750 ASCIIToUTF16(scenario.find_this),
751 ASCIIToUTF16(scenario.replace_with));
752 EXPECT_EQ(ASCIIToUTF16(scenario.expected), str);
753 }
754
755 // std::string with insufficient capacity: expansion must realloc the buffer.
756 for (const auto& scenario : cases) {
Jan Wilken Dörrie5db50ac2021-02-15 11:43:16757 std::string str(scenario.str);
nick09d9682b2017-08-02 00:31:16758 str.shrink_to_fit(); // This is nonbinding, but it's the best we've got.
759 ReplaceSubstringsAfterOffset(&str, scenario.start_offset,
760 scenario.find_this, scenario.replace_with);
761 EXPECT_EQ(scenario.expected, str);
762 }
763
764 // std::string with ample capacity: should be possible to grow in-place.
765 for (const auto& scenario : cases) {
Jan Wilken Dörrie5db50ac2021-02-15 11:43:16766 std::string str(scenario.str);
nick09d9682b2017-08-02 00:31:16767 str.reserve(std::max(scenario.str.length(), scenario.expected.length()) *
768 2);
769
770 ReplaceSubstringsAfterOffset(&str, scenario.start_offset,
771 scenario.find_this, scenario.replace_with);
772 EXPECT_EQ(scenario.expected, str);
initial.commitd7cae122008-07-26 21:49:38773 }
774}
775
aa@chromium.orga1a61172009-01-21 23:47:54776TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
777 static const struct {
dsh@google.comdbf476d2009-03-03 01:21:09778 const char* str;
Jan Wilken Dörrie085b2aa2021-03-12 16:26:57779 std::u16string::size_type start_offset;
dsh@google.comdbf476d2009-03-03 01:21:09780 const char* find_this;
781 const char* replace_with;
782 const char* expected;
aa@chromium.orga1a61172009-01-21 23:47:54783 } cases[] = {
dsh@google.comdbf476d2009-03-03 01:21:09784 {"aaa", 0, "a", "b", "baa"},
785 {"abb", 0, "ab", "a", "ab"},
786 {"Removing some substrings inging", 0, "ing", "",
787 "Remov some substrings inging"},
788 {"Not found", 0, "x", "0", "Not found"},
789 {"Not found again", 5, "x", "0", "Not found again"},
790 {" Making it much longer ", 0, " ", "Four score and seven years ago",
791 "Four score and seven years agoMaking it much longer "},
792 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
793 {"Replace me only me once", 4, "me ", "", "Replace only me once"},
794 {"abababab", 2, "ab", "c", "abcabab"},
aa@chromium.orga1a61172009-01-21 23:47:54795 };
796
jdoerrie6c6229352018-10-22 15:55:43797 for (const auto& i : cases) {
Jan Wilken Dörrie085b2aa2021-03-12 16:26:57798 std::u16string str = ASCIIToUTF16(i.str);
jdoerrie6c6229352018-10-22 15:55:43799 ReplaceFirstSubstringAfterOffset(&str, i.start_offset,
800 ASCIIToUTF16(i.find_this),
801 ASCIIToUTF16(i.replace_with));
802 EXPECT_EQ(ASCIIToUTF16(i.expected), str);
aa@chromium.orga1a61172009-01-21 23:47:54803 }
804}
805
tfarina@chromium.org61197df2010-07-31 21:03:16806TEST(StringUtilTest, HexDigitToInt) {
807 EXPECT_EQ(0, HexDigitToInt('0'));
808 EXPECT_EQ(1, HexDigitToInt('1'));
809 EXPECT_EQ(2, HexDigitToInt('2'));
810 EXPECT_EQ(3, HexDigitToInt('3'));
811 EXPECT_EQ(4, HexDigitToInt('4'));
812 EXPECT_EQ(5, HexDigitToInt('5'));
813 EXPECT_EQ(6, HexDigitToInt('6'));
814 EXPECT_EQ(7, HexDigitToInt('7'));
815 EXPECT_EQ(8, HexDigitToInt('8'));
816 EXPECT_EQ(9, HexDigitToInt('9'));
817 EXPECT_EQ(10, HexDigitToInt('A'));
818 EXPECT_EQ(11, HexDigitToInt('B'));
819 EXPECT_EQ(12, HexDigitToInt('C'));
820 EXPECT_EQ(13, HexDigitToInt('D'));
821 EXPECT_EQ(14, HexDigitToInt('E'));
822 EXPECT_EQ(15, HexDigitToInt('F'));
823
824 // Verify the lower case as well.
825 EXPECT_EQ(10, HexDigitToInt('a'));
826 EXPECT_EQ(11, HexDigitToInt('b'));
827 EXPECT_EQ(12, HexDigitToInt('c'));
828 EXPECT_EQ(13, HexDigitToInt('d'));
829 EXPECT_EQ(14, HexDigitToInt('e'));
830 EXPECT_EQ(15, HexDigitToInt('f'));
831}
832
aa@chromium.org0b7c0922009-02-02 04:09:58833TEST(StringUtilTest, JoinString) {
smckay@chromium.orga58f56cd2012-08-27 22:58:36834 std::string separator(", ");
835 std::vector<std::string> parts;
836 EXPECT_EQ(std::string(), JoinString(parts, separator));
837
mgiuca601ab6b2017-03-06 03:05:23838 parts.push_back(std::string());
839 EXPECT_EQ(std::string(), JoinString(parts, separator));
840 parts.clear();
841
smckay@chromium.orga58f56cd2012-08-27 22:58:36842 parts.push_back("a");
843 EXPECT_EQ("a", JoinString(parts, separator));
844
845 parts.push_back("b");
846 parts.push_back("c");
847 EXPECT_EQ("a, b, c", JoinString(parts, separator));
848
dcheng@chromium.org007b3f82013-04-09 08:46:45849 parts.push_back(std::string());
smckay@chromium.orga58f56cd2012-08-27 22:58:36850 EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
851 parts.push_back(" ");
852 EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
853}
854
brettwd94a22142015-07-15 05:19:26855TEST(StringUtilTest, JoinString16) {
Jan Wilken Dörriec92a6d7242021-03-23 17:43:48856 std::u16string separator = u", ";
Jan Wilken Dörrie085b2aa2021-03-12 16:26:57857 std::vector<std::u16string> parts;
858 EXPECT_EQ(std::u16string(), JoinString(parts, separator));
smckay@chromium.orga58f56cd2012-08-27 22:58:36859
Jan Wilken Dörrie085b2aa2021-03-12 16:26:57860 parts.push_back(std::u16string());
861 EXPECT_EQ(std::u16string(), JoinString(parts, separator));
mgiuca601ab6b2017-03-06 03:05:23862 parts.clear();
863
Jan Wilken Dörriec92a6d7242021-03-23 17:43:48864 parts.push_back(u"a");
865 EXPECT_EQ(u"a", JoinString(parts, separator));
smckay@chromium.orga58f56cd2012-08-27 22:58:36866
Jan Wilken Dörriec92a6d7242021-03-23 17:43:48867 parts.push_back(u"b");
868 parts.push_back(u"c");
869 EXPECT_EQ(u"a, b, c", JoinString(parts, separator));
smckay@chromium.orga58f56cd2012-08-27 22:58:36870
Jan Wilken Dörriec92a6d7242021-03-23 17:43:48871 parts.push_back(u"");
872 EXPECT_EQ(u"a, b, c, ", JoinString(parts, separator));
873 parts.push_back(u" ");
874 EXPECT_EQ(u"a|b|c|| ", JoinString(parts, u"|"));
smckay@chromium.orga58f56cd2012-08-27 22:58:36875}
876
mgiucab0643122017-02-23 08:41:40877TEST(StringUtilTest, JoinStringPiece) {
878 std::string separator(", ");
mgiuca601ab6b2017-03-06 03:05:23879 std::vector<StringPiece> parts;
880 EXPECT_EQ(std::string(), JoinString(parts, separator));
881
882 // Test empty first part (https://crbug.com/698073).
883 parts.push_back(StringPiece());
884 EXPECT_EQ(std::string(), JoinString(parts, separator));
885 parts.clear();
mgiucab0643122017-02-23 08:41:40886
887 parts.push_back("a");
888 EXPECT_EQ("a", JoinString(parts, separator));
889
890 parts.push_back("b");
891 parts.push_back("c");
892 EXPECT_EQ("a, b, c", JoinString(parts, separator));
893
mgiuca601ab6b2017-03-06 03:05:23894 parts.push_back(StringPiece());
mgiucab0643122017-02-23 08:41:40895 EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
896 parts.push_back(" ");
897 EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
898}
899
900TEST(StringUtilTest, JoinStringPiece16) {
Jan Wilken Dörriec92a6d7242021-03-23 17:43:48901 std::u16string separator = u", ";
mgiuca601ab6b2017-03-06 03:05:23902 std::vector<StringPiece16> parts;
Jan Wilken Dörrie085b2aa2021-03-12 16:26:57903 EXPECT_EQ(std::u16string(), JoinString(parts, separator));
mgiuca601ab6b2017-03-06 03:05:23904
905 // Test empty first part (https://crbug.com/698073).
906 parts.push_back(StringPiece16());
Jan Wilken Dörrie085b2aa2021-03-12 16:26:57907 EXPECT_EQ(std::u16string(), JoinString(parts, separator));
mgiuca601ab6b2017-03-06 03:05:23908 parts.clear();
mgiucab0643122017-02-23 08:41:40909
Jan Wilken Dörriec92a6d7242021-03-23 17:43:48910 const std::u16string kA = u"a";
mgiucab0643122017-02-23 08:41:40911 parts.push_back(kA);
Jan Wilken Dörriec92a6d7242021-03-23 17:43:48912 EXPECT_EQ(u"a", JoinString(parts, separator));
mgiucab0643122017-02-23 08:41:40913
Jan Wilken Dörriec92a6d7242021-03-23 17:43:48914 const std::u16string kB = u"b";
mgiucab0643122017-02-23 08:41:40915 parts.push_back(kB);
Jan Wilken Dörriec92a6d7242021-03-23 17:43:48916 const std::u16string kC = u"c";
mgiucab0643122017-02-23 08:41:40917 parts.push_back(kC);
Jan Wilken Dörriec92a6d7242021-03-23 17:43:48918 EXPECT_EQ(u"a, b, c", JoinString(parts, separator));
mgiucab0643122017-02-23 08:41:40919
mgiuca601ab6b2017-03-06 03:05:23920 parts.push_back(StringPiece16());
Jan Wilken Dörriec92a6d7242021-03-23 17:43:48921 EXPECT_EQ(u"a, b, c, ", JoinString(parts, separator));
922 const std::u16string kSpace = u" ";
mgiucab0643122017-02-23 08:41:40923 parts.push_back(kSpace);
Jan Wilken Dörriec92a6d7242021-03-23 17:43:48924 EXPECT_EQ(u"a|b|c|| ", JoinString(parts, u"|"));
mgiucab0643122017-02-23 08:41:40925}
926
927TEST(StringUtilTest, JoinStringInitializerList) {
928 std::string separator(", ");
mgiuca601ab6b2017-03-06 03:05:23929 EXPECT_EQ(std::string(), JoinString({}, separator));
930
931 // Test empty first part (https://crbug.com/698073).
932 EXPECT_EQ(std::string(), JoinString({StringPiece()}, separator));
mgiucab0643122017-02-23 08:41:40933
934 // With const char*s.
935 EXPECT_EQ("a", JoinString({"a"}, separator));
936 EXPECT_EQ("a, b, c", JoinString({"a", "b", "c"}, separator));
mgiuca601ab6b2017-03-06 03:05:23937 EXPECT_EQ("a, b, c, ", JoinString({"a", "b", "c", StringPiece()}, separator));
938 EXPECT_EQ("a|b|c|| ", JoinString({"a", "b", "c", StringPiece(), " "}, "|"));
mgiucab0643122017-02-23 08:41:40939
940 // With std::strings.
941 const std::string kA = "a";
942 const std::string kB = "b";
943 EXPECT_EQ("a, b", JoinString({kA, kB}, separator));
944
945 // With StringPieces.
946 const StringPiece kPieceA = kA;
947 const StringPiece kPieceB = kB;
948 EXPECT_EQ("a, b", JoinString({kPieceA, kPieceB}, separator));
949}
950
951TEST(StringUtilTest, JoinStringInitializerList16) {
Jan Wilken Dörriec92a6d7242021-03-23 17:43:48952 std::u16string separator = u", ";
Jan Wilken Dörrie085b2aa2021-03-12 16:26:57953 EXPECT_EQ(std::u16string(), JoinString({}, separator));
mgiuca601ab6b2017-03-06 03:05:23954
955 // Test empty first part (https://crbug.com/698073).
Jan Wilken Dörrie085b2aa2021-03-12 16:26:57956 EXPECT_EQ(std::u16string(), JoinString({StringPiece16()}, separator));
mgiucab0643122017-02-23 08:41:40957
958 // With string16s.
Jan Wilken Dörriec92a6d7242021-03-23 17:43:48959 const std::u16string kA = u"a";
960 EXPECT_EQ(u"a", JoinString({kA}, separator));
mgiucab0643122017-02-23 08:41:40961
Jan Wilken Dörriec92a6d7242021-03-23 17:43:48962 const std::u16string kB = u"b";
963 const std::u16string kC = u"c";
964 EXPECT_EQ(u"a, b, c", JoinString({kA, kB, kC}, separator));
mgiucab0643122017-02-23 08:41:40965
Jan Wilken Dörriec92a6d7242021-03-23 17:43:48966 EXPECT_EQ(u"a, b, c, ", JoinString({kA, kB, kC, StringPiece16()}, separator));
967 const std::u16string kSpace = u" ";
968 EXPECT_EQ(u"a|b|c|| ",
969 JoinString({kA, kB, kC, StringPiece16(), kSpace}, u"|"));
mgiucab0643122017-02-23 08:41:40970
971 // With StringPiece16s.
972 const StringPiece16 kPieceA = kA;
973 const StringPiece16 kPieceB = kB;
Jan Wilken Dörriec92a6d7242021-03-23 17:43:48974 EXPECT_EQ(u"a, b", JoinString({kPieceA, kPieceB}, separator));
mgiucab0643122017-02-23 08:41:40975}
976
initial.commitd7cae122008-07-26 21:49:38977TEST(StringUtilTest, StartsWith) {
brettwa7ff1b292015-07-16 17:49:29978 EXPECT_TRUE(StartsWith("javascript:url", "javascript",
979 base::CompareCase::SENSITIVE));
980 EXPECT_FALSE(StartsWith("JavaScript:url", "javascript",
981 base::CompareCase::SENSITIVE));
982 EXPECT_TRUE(StartsWith("javascript:url", "javascript",
983 base::CompareCase::INSENSITIVE_ASCII));
984 EXPECT_TRUE(StartsWith("JavaScript:url", "javascript",
985 base::CompareCase::INSENSITIVE_ASCII));
986 EXPECT_FALSE(StartsWith("java", "javascript", base::CompareCase::SENSITIVE));
987 EXPECT_FALSE(StartsWith("java", "javascript",
988 base::CompareCase::INSENSITIVE_ASCII));
989 EXPECT_FALSE(StartsWith(std::string(), "javascript",
990 base::CompareCase::INSENSITIVE_ASCII));
991 EXPECT_FALSE(StartsWith(std::string(), "javascript",
992 base::CompareCase::SENSITIVE));
993 EXPECT_TRUE(StartsWith("java", std::string(),
994 base::CompareCase::INSENSITIVE_ASCII));
995 EXPECT_TRUE(StartsWith("java", std::string(), base::CompareCase::SENSITIVE));
jcampan@chromium.org89ac46c92008-11-05 19:28:08996
Jan Wilken Dörriec92a6d7242021-03-23 17:43:48997 EXPECT_TRUE(StartsWith(u"javascript:url", u"javascript",
brettwa7ff1b292015-07-16 17:49:29998 base::CompareCase::SENSITIVE));
Jan Wilken Dörriec92a6d7242021-03-23 17:43:48999 EXPECT_FALSE(StartsWith(u"JavaScript:url", u"javascript",
brettwa7ff1b292015-07-16 17:49:291000 base::CompareCase::SENSITIVE));
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481001 EXPECT_TRUE(StartsWith(u"javascript:url", u"javascript",
brettwa7ff1b292015-07-16 17:49:291002 base::CompareCase::INSENSITIVE_ASCII));
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481003 EXPECT_TRUE(StartsWith(u"JavaScript:url", u"javascript",
brettwa7ff1b292015-07-16 17:49:291004 base::CompareCase::INSENSITIVE_ASCII));
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481005 EXPECT_FALSE(
1006 StartsWith(u"java", u"javascript", base::CompareCase::SENSITIVE));
1007 EXPECT_FALSE(
1008 StartsWith(u"java", u"javascript", base::CompareCase::INSENSITIVE_ASCII));
1009 EXPECT_FALSE(StartsWith(std::u16string(), u"javascript",
brettwa7ff1b292015-07-16 17:49:291010 base::CompareCase::INSENSITIVE_ASCII));
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481011 EXPECT_FALSE(StartsWith(std::u16string(), u"javascript",
brettwa7ff1b292015-07-16 17:49:291012 base::CompareCase::SENSITIVE));
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481013 EXPECT_TRUE(StartsWith(u"java", std::u16string(),
brettwa7ff1b292015-07-16 17:49:291014 base::CompareCase::INSENSITIVE_ASCII));
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481015 EXPECT_TRUE(
1016 StartsWith(u"java", std::u16string(), base::CompareCase::SENSITIVE));
initial.commitd7cae122008-07-26 21:49:381017}
1018
stuartmorgan@google.com996fd702009-09-04 19:12:371019TEST(StringUtilTest, EndsWith) {
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481020 EXPECT_TRUE(
1021 EndsWith(u"Foo.plugin", u".plugin", base::CompareCase::SENSITIVE));
1022 EXPECT_FALSE(
1023 EndsWith(u"Foo.Plugin", u".plugin", base::CompareCase::SENSITIVE));
1024 EXPECT_TRUE(EndsWith(u"Foo.plugin", u".plugin",
brettwa7ff1b292015-07-16 17:49:291025 base::CompareCase::INSENSITIVE_ASCII));
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481026 EXPECT_TRUE(EndsWith(u"Foo.Plugin", u".plugin",
brettwa7ff1b292015-07-16 17:49:291027 base::CompareCase::INSENSITIVE_ASCII));
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481028 EXPECT_FALSE(EndsWith(u".plug", u".plugin", base::CompareCase::SENSITIVE));
1029 EXPECT_FALSE(
1030 EndsWith(u".plug", u".plugin", base::CompareCase::INSENSITIVE_ASCII));
1031 EXPECT_FALSE(
1032 EndsWith(u"Foo.plugin Bar", u".plugin", base::CompareCase::SENSITIVE));
1033 EXPECT_FALSE(EndsWith(u"Foo.plugin Bar", u".plugin",
brettwa7ff1b292015-07-16 17:49:291034 base::CompareCase::INSENSITIVE_ASCII));
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481035 EXPECT_FALSE(EndsWith(std::u16string(), u".plugin",
brettwa7ff1b292015-07-16 17:49:291036 base::CompareCase::INSENSITIVE_ASCII));
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481037 EXPECT_FALSE(
1038 EndsWith(std::u16string(), u".plugin", base::CompareCase::SENSITIVE));
1039 EXPECT_TRUE(EndsWith(u"Foo.plugin", std::u16string(),
brettwa7ff1b292015-07-16 17:49:291040 base::CompareCase::INSENSITIVE_ASCII));
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481041 EXPECT_TRUE(
1042 EndsWith(u"Foo.plugin", std::u16string(), base::CompareCase::SENSITIVE));
1043 EXPECT_TRUE(
1044 EndsWith(u".plugin", u".plugin", base::CompareCase::INSENSITIVE_ASCII));
1045 EXPECT_TRUE(EndsWith(u".plugin", u".plugin", base::CompareCase::SENSITIVE));
Jan Wilken Dörrie085b2aa2021-03-12 16:26:571046 EXPECT_TRUE(EndsWith(std::u16string(), std::u16string(),
1047 base::CompareCase::INSENSITIVE_ASCII));
1048 EXPECT_TRUE(EndsWith(std::u16string(), std::u16string(),
1049 base::CompareCase::SENSITIVE));
stuartmorgan@google.com996fd702009-09-04 19:12:371050}
1051
initial.commitd7cae122008-07-26 21:49:381052TEST(StringUtilTest, GetStringFWithOffsets) {
Jan Wilken Dörrie085b2aa2021-03-12 16:26:571053 std::vector<std::u16string> subst;
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481054 subst.push_back(u"1");
1055 subst.push_back(u"2");
initial.commitd7cae122008-07-26 21:49:381056 std::vector<size_t> offsets;
1057
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481058 ReplaceStringPlaceholders(u"Hello, $1. Your number is $2.", subst, &offsets);
darin@google.comcb2f3632008-08-14 20:27:291059 EXPECT_EQ(2U, offsets.size());
1060 EXPECT_EQ(7U, offsets[0]);
1061 EXPECT_EQ(25U, offsets[1]);
initial.commitd7cae122008-07-26 21:49:381062 offsets.clear();
1063
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481064 ReplaceStringPlaceholders(u"Hello, $2. Your number is $1.", subst, &offsets);
darin@google.comcb2f3632008-08-14 20:27:291065 EXPECT_EQ(2U, offsets.size());
1066 EXPECT_EQ(25U, offsets[0]);
1067 EXPECT_EQ(7U, offsets[1]);
initial.commitd7cae122008-07-26 21:49:381068 offsets.clear();
1069}
1070
tony@chromium.org55b77ceb2011-06-07 20:22:221071TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
1072 // Test whether replacestringplaceholders works as expected when there
1073 // are fewer inputs than outputs.
Jan Wilken Dörrie085b2aa2021-03-12 16:26:571074 std::vector<std::u16string> subst;
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481075 subst.push_back(u"9a");
1076 subst.push_back(u"8b");
1077 subst.push_back(u"7c");
tony@chromium.org55b77ceb2011-06-07 20:22:221078
Jan Wilken Dörrie085b2aa2021-03-12 16:26:571079 std::u16string formatted = ReplaceStringPlaceholders(
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481080 u"$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i", subst, nullptr);
tony@chromium.org55b77ceb2011-06-07 20:22:221081
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481082 EXPECT_EQ(u"9aa,8bb,7cc,d,e,f,9ag,8bh,7ci", formatted);
tony@chromium.org55b77ceb2011-06-07 20:22:221083}
1084
glen@chromium.orgdff6d132009-05-15 22:40:571085TEST(StringUtilTest, ReplaceStringPlaceholders) {
Jan Wilken Dörrie085b2aa2021-03-12 16:26:571086 std::vector<std::u16string> subst;
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481087 subst.push_back(u"9a");
1088 subst.push_back(u"8b");
1089 subst.push_back(u"7c");
1090 subst.push_back(u"6d");
1091 subst.push_back(u"5e");
1092 subst.push_back(u"4f");
1093 subst.push_back(u"3g");
1094 subst.push_back(u"2h");
1095 subst.push_back(u"1i");
glen@chromium.orgdff6d132009-05-15 22:40:571096
Jan Wilken Dörrie085b2aa2021-03-12 16:26:571097 std::u16string formatted = ReplaceStringPlaceholders(
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481098 u"$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, nullptr);
glen@chromium.orgdff6d132009-05-15 22:40:571099
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481100 EXPECT_EQ(u"9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii", formatted);
glen@chromium.orgdff6d132009-05-15 22:40:571101}
1102
Nick Cartere2c7d65d2017-09-28 00:41:371103TEST(StringUtilTest, ReplaceStringPlaceholdersNetExpansionWithContraction) {
1104 // In this test, some of the substitutions are shorter than the placeholders,
1105 // but overall the string gets longer.
Jan Wilken Dörrie085b2aa2021-03-12 16:26:571106 std::vector<std::u16string> subst;
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481107 subst.push_back(u"9a____");
1108 subst.push_back(u"B");
1109 subst.push_back(u"7c___");
1110 subst.push_back(u"d");
1111 subst.push_back(u"5e____");
1112 subst.push_back(u"F");
1113 subst.push_back(u"3g___");
1114 subst.push_back(u"h");
1115 subst.push_back(u"1i_____");
Nick Cartere2c7d65d2017-09-28 00:41:371116
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481117 std::u16string original = u"$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i";
Jan Wilken Dörrie085b2aa2021-03-12 16:26:571118 std::u16string expected =
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481119 u"9a____a,Bb,7c___c,dd,5e____e,Ff,3g___g,hh,1i_____i";
Nick Cartere2c7d65d2017-09-28 00:41:371120
1121 EXPECT_EQ(expected, ReplaceStringPlaceholders(original, subst, nullptr));
1122
1123 std::vector<size_t> offsets;
1124 EXPECT_EQ(expected, ReplaceStringPlaceholders(original, subst, &offsets));
1125 std::vector<size_t> expected_offsets = {0, 8, 11, 18, 21, 29, 32, 39, 42};
1126 EXPECT_EQ(offsets.size(), subst.size());
1127 EXPECT_EQ(expected_offsets, offsets);
1128 for (size_t i = 0; i < offsets.size(); i++) {
1129 EXPECT_EQ(expected.substr(expected_offsets[i], subst[i].length()),
1130 subst[i]);
1131 }
1132}
1133
1134TEST(StringUtilTest, ReplaceStringPlaceholdersNetContractionWithExpansion) {
1135 // In this test, some of the substitutions are longer than the placeholders,
1136 // but overall the string gets smaller. Additionally, the placeholders appear
1137 // in a permuted order.
Jan Wilken Dörrie085b2aa2021-03-12 16:26:571138 std::vector<std::u16string> subst;
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481139 subst.push_back(u"z");
1140 subst.push_back(u"y");
1141 subst.push_back(u"XYZW");
1142 subst.push_back(u"x");
1143 subst.push_back(u"w");
Nick Cartere2c7d65d2017-09-28 00:41:371144
Jan Wilken Dörrie085b2aa2021-03-12 16:26:571145 std::u16string formatted =
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481146 ReplaceStringPlaceholders(u"$3_$4$2$1$5", subst, nullptr);
Nick Cartere2c7d65d2017-09-28 00:41:371147
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481148 EXPECT_EQ(u"XYZW_xyzw", formatted);
Nick Cartere2c7d65d2017-09-28 00:41:371149}
1150
sergeyu064d2a22016-04-15 03:15:171151TEST(StringUtilTest, ReplaceStringPlaceholdersOneDigit) {
Jan Wilken Dörrie085b2aa2021-03-12 16:26:571152 std::vector<std::u16string> subst;
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481153 subst.push_back(u"1a");
Jan Wilken Dörrie085b2aa2021-03-12 16:26:571154 std::u16string formatted =
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481155 ReplaceStringPlaceholders(u" $16 ", subst, nullptr);
1156 EXPECT_EQ(u" 1a6 ", formatted);
sergeyu064d2a22016-04-15 03:15:171157}
glen@chromium.orgdff6d132009-05-15 22:40:571158
sergeyu064d2a22016-04-15 03:15:171159TEST(StringUtilTest, ReplaceStringPlaceholdersInvalidPlaceholder) {
Jan Wilken Dörrie085b2aa2021-03-12 16:26:571160 std::vector<std::u16string> subst;
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481161 subst.push_back(u"1a");
Jan Wilken Dörrie085b2aa2021-03-12 16:26:571162 std::u16string formatted =
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481163 ReplaceStringPlaceholders(u"+$-+$A+$1+", subst, nullptr);
1164 EXPECT_EQ(u"+++1a+", formatted);
glen@chromium.orgdff6d132009-05-15 22:40:571165}
1166
tony@chromium.orge4dad5bd2009-09-29 21:32:011167TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
1168 std::vector<std::string> subst;
1169 subst.push_back("9a");
1170 subst.push_back("8b");
1171 subst.push_back("7c");
1172 subst.push_back("6d");
1173 subst.push_back("5e");
1174 subst.push_back("4f");
1175 subst.push_back("3g");
1176 subst.push_back("2h");
1177 subst.push_back("1i");
1178
1179 std::string formatted =
1180 ReplaceStringPlaceholders(
sergeyu064d2a22016-04-15 03:15:171181 "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, nullptr);
tony@chromium.orge4dad5bd2009-09-29 21:32:011182
sergeyu064d2a22016-04-15 03:15:171183 EXPECT_EQ("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii", formatted);
tony@chromium.orge4dad5bd2009-09-29 21:32:011184}
1185
Nick Cartere2c7d65d2017-09-28 00:41:371186TEST(StringUtilTest, StdStringReplaceStringPlaceholdersMultipleMatches) {
1187 std::vector<std::string> subst;
1188 subst.push_back("4"); // Referenced twice.
1189 subst.push_back("?"); // Unreferenced.
1190 subst.push_back("!"); // Unreferenced.
1191 subst.push_back("16"); // Referenced once.
1192
1193 std::string original = "$1 * $1 == $4";
1194 std::string expected = "4 * 4 == 16";
1195 EXPECT_EQ(expected, ReplaceStringPlaceholders(original, subst, nullptr));
1196 std::vector<size_t> offsets;
1197 EXPECT_EQ(expected, ReplaceStringPlaceholders(original, subst, &offsets));
1198 std::vector<size_t> expected_offsets = {0, 4, 9};
1199 EXPECT_EQ(expected_offsets, offsets);
1200}
1201
aa@chromium.org5f522afa2010-08-31 19:32:091202TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
1203 std::vector<std::string> subst;
1204 subst.push_back("a");
1205 subst.push_back("b");
1206 subst.push_back("c");
sergeyu064d2a22016-04-15 03:15:171207 EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, nullptr),
aa@chromium.org5f522afa2010-08-31 19:32:091208 "$1 $$2 $$$3");
1209}
1210
deanm@google.com954d58c2008-08-13 14:33:401211TEST(StringUtilTest, LcpyTest) {
1212 // Test the normal case where we fit in our buffer.
1213 {
1214 char dst[10];
Daniel Chengc6f0bcd62023-03-04 00:56:061215 char16_t u16dst[10];
nkostylev@chromium.org0ae34132013-12-13 16:58:281216 wchar_t wdst[10];
Daniel Chengf45f47602022-02-28 22:38:321217 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", std::size(dst)));
Daniel Chengc6f0bcd62023-03-04 00:56:061218 EXPECT_EQ(0, memcmp(dst, "abcdefg", sizeof(dst[0]) * 8));
1219 EXPECT_EQ(7U, u16cstrlcpy(u16dst, u"abcdefg", std::size(u16dst)));
1220 EXPECT_EQ(0, memcmp(u16dst, u"abcdefg", sizeof(u16dst[0]) * 8));
Daniel Chengf45f47602022-02-28 22:38:321221 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", std::size(wdst)));
Daniel Chengc6f0bcd62023-03-04 00:56:061222 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wdst[0]) * 8));
deanm@google.com954d58c2008-08-13 14:33:401223 }
initial.commitd7cae122008-07-26 21:49:381224
deanm@google.com954d58c2008-08-13 14:33:401225 // Test dst_size == 0, nothing should be written to |dst| and we should
1226 // have the equivalent of strlen(src).
1227 {
1228 char dst[2] = {1, 2};
Daniel Chengc6f0bcd62023-03-04 00:56:061229 char16_t u16dst[2] = {1, 2};
nkostylev@chromium.org0ae34132013-12-13 16:58:281230 wchar_t wdst[2] = {1, 2};
brettw7622fbed2015-06-09 20:20:141231 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", 0));
deanm@google.com954d58c2008-08-13 14:33:401232 EXPECT_EQ(1, dst[0]);
1233 EXPECT_EQ(2, dst[1]);
Daniel Chengc6f0bcd62023-03-04 00:56:061234 EXPECT_EQ(7U, u16cstrlcpy(u16dst, u"abcdefg", 0));
1235 EXPECT_EQ(char16_t{1}, u16dst[0]);
1236 EXPECT_EQ(char16_t{2}, u16dst[1]);
brettw7622fbed2015-06-09 20:20:141237 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", 0));
rmcilroy@chromium.org568db812014-04-25 00:08:061238 EXPECT_EQ(static_cast<wchar_t>(1), wdst[0]);
1239 EXPECT_EQ(static_cast<wchar_t>(2), wdst[1]);
deanm@google.com954d58c2008-08-13 14:33:401240 }
1241
1242 // Test the case were we _just_ competely fit including the null.
1243 {
1244 char dst[8];
Daniel Chengc6f0bcd62023-03-04 00:56:061245 char16_t u16dst[8];
nkostylev@chromium.org0ae34132013-12-13 16:58:281246 wchar_t wdst[8];
Daniel Chengf45f47602022-02-28 22:38:321247 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", std::size(dst)));
deanm@google.com954d58c2008-08-13 14:33:401248 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
Daniel Chengc6f0bcd62023-03-04 00:56:061249 EXPECT_EQ(7U, u16cstrlcpy(u16dst, u"abcdefg", std::size(u16dst)));
1250 EXPECT_EQ(0, memcmp(u16dst, u"abcdefg", sizeof(u16dst)));
Daniel Chengf45f47602022-02-28 22:38:321251 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", std::size(wdst)));
Daniel Chengc6f0bcd62023-03-04 00:56:061252 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wdst)));
deanm@google.com954d58c2008-08-13 14:33:401253 }
1254
1255 // Test the case were we we are one smaller, so we can't fit the null.
1256 {
1257 char dst[7];
Daniel Chengc6f0bcd62023-03-04 00:56:061258 char16_t u16dst[7];
nkostylev@chromium.org0ae34132013-12-13 16:58:281259 wchar_t wdst[7];
Daniel Chengf45f47602022-02-28 22:38:321260 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", std::size(dst)));
Daniel Chengc6f0bcd62023-03-04 00:56:061261 EXPECT_EQ(0, memcmp(dst, "abcdef", sizeof(dst[0]) * 7));
1262 EXPECT_EQ(7U, u16cstrlcpy(u16dst, u"abcdefg", std::size(u16dst)));
1263 EXPECT_EQ(0, memcmp(u16dst, u"abcdef", sizeof(u16dst[0]) * 7));
Daniel Chengf45f47602022-02-28 22:38:321264 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", std::size(wdst)));
Daniel Chengc6f0bcd62023-03-04 00:56:061265 EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wdst[0]) * 7));
deanm@google.com954d58c2008-08-13 14:33:401266 }
1267
1268 // Test the case were we are just too small.
1269 {
1270 char dst[3];
Daniel Chengc6f0bcd62023-03-04 00:56:061271 char16_t u16dst[3];
nkostylev@chromium.org0ae34132013-12-13 16:58:281272 wchar_t wdst[3];
Daniel Chengf45f47602022-02-28 22:38:321273 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", std::size(dst)));
Daniel Chengc6f0bcd62023-03-04 00:56:061274 EXPECT_EQ(0, memcmp(dst, "ab", sizeof(dst)));
1275 EXPECT_EQ(7U, u16cstrlcpy(u16dst, u"abcdefg", std::size(u16dst)));
1276 EXPECT_EQ(0, memcmp(u16dst, u"ab", sizeof(u16dst)));
Daniel Chengf45f47602022-02-28 22:38:321277 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", std::size(wdst)));
Daniel Chengc6f0bcd62023-03-04 00:56:061278 EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wdst)));
deanm@google.com954d58c2008-08-13 14:33:401279 }
1280}
mmentovai@google.com44cd16f2008-08-14 01:25:321281
1282TEST(StringUtilTest, WprintfFormatPortabilityTest) {
tfarina@chromium.orgc3272bd22011-10-16 13:36:311283 static const struct {
mmentovai@google.com44cd16f2008-08-14 01:25:321284 const wchar_t* input;
1285 bool portable;
1286 } cases[] = {
1287 { L"%ls", true },
1288 { L"%s", false },
1289 { L"%S", false },
1290 { L"%lS", false },
1291 { L"Hello, %s", false },
1292 { L"%lc", true },
1293 { L"%c", false },
1294 { L"%C", false },
1295 { L"%lC", false },
1296 { L"%ls %s", false },
1297 { L"%s %ls", false },
1298 { L"%s %ls %s", false },
1299 { L"%f", true },
1300 { L"%f %F", false },
1301 { L"%d %D", false },
1302 { L"%o %O", false },
1303 { L"%u %U", false },
1304 { L"%f %d %o %u", true },
1305 { L"%-8d (%02.1f%)", true },
1306 { L"% 10s", false },
1307 { L"% 10ls", true }
1308 };
jdoerrie6c6229352018-10-22 15:55:431309 for (const auto& i : cases)
1310 EXPECT_EQ(i.portable, IsWprintfFormatPortable(i.input));
mmentovai@google.com44cd16f2008-08-14 01:25:321311}
license.botbf09a502008-08-24 00:55:551312
Jan Wilken Dörriea20d48d2021-01-29 18:10:211313TEST(StringUtilTest, MakeBasicStringPieceTest) {
1314 constexpr char kFoo[] = "Foo";
1315 static_assert(MakeStringPiece(kFoo, kFoo + 3) == kFoo, "");
1316 static_assert(MakeStringPiece(kFoo, kFoo + 3).data() == kFoo, "");
1317 static_assert(MakeStringPiece(kFoo, kFoo + 3).size() == 3, "");
1318 static_assert(MakeStringPiece(kFoo + 3, kFoo + 3).empty(), "");
1319 static_assert(MakeStringPiece(kFoo + 4, kFoo + 4).empty(), "");
1320
1321 std::string foo = kFoo;
1322 EXPECT_EQ(MakeStringPiece(foo.begin(), foo.end()), foo);
1323 EXPECT_EQ(MakeStringPiece(foo.begin(), foo.end()).data(), foo.data());
1324 EXPECT_EQ(MakeStringPiece(foo.begin(), foo.end()).size(), foo.size());
1325 EXPECT_TRUE(MakeStringPiece(foo.end(), foo.end()).empty());
1326
Jan Wilken Dörrie677e0c872021-03-10 10:04:381327 constexpr char16_t kBar[] = u"Bar";
Jan Wilken Dörriea20d48d2021-01-29 18:10:211328 static_assert(MakeStringPiece16(kBar, kBar + 3) == kBar, "");
1329 static_assert(MakeStringPiece16(kBar, kBar + 3).data() == kBar, "");
1330 static_assert(MakeStringPiece16(kBar, kBar + 3).size() == 3, "");
1331 static_assert(MakeStringPiece16(kBar + 3, kBar + 3).empty(), "");
1332 static_assert(MakeStringPiece16(kBar + 4, kBar + 4).empty(), "");
1333
Jan Wilken Dörrie085b2aa2021-03-12 16:26:571334 std::u16string bar = kBar;
Jan Wilken Dörriea20d48d2021-01-29 18:10:211335 EXPECT_EQ(MakeStringPiece16(bar.begin(), bar.end()), bar);
1336 EXPECT_EQ(MakeStringPiece16(bar.begin(), bar.end()).data(), bar.data());
1337 EXPECT_EQ(MakeStringPiece16(bar.begin(), bar.end()).size(), bar.size());
1338 EXPECT_TRUE(MakeStringPiece16(bar.end(), bar.end()).empty());
1339
1340 constexpr wchar_t kBaz[] = L"Baz";
1341 static_assert(MakeWStringPiece(kBaz, kBaz + 3) == kBaz, "");
1342 static_assert(MakeWStringPiece(kBaz, kBaz + 3).data() == kBaz, "");
1343 static_assert(MakeWStringPiece(kBaz, kBaz + 3).size() == 3, "");
1344 static_assert(MakeWStringPiece(kBaz + 3, kBaz + 3).empty(), "");
1345 static_assert(MakeWStringPiece(kBaz + 4, kBaz + 4).empty(), "");
1346
1347 std::wstring baz = kBaz;
1348 EXPECT_EQ(MakeWStringPiece(baz.begin(), baz.end()), baz);
1349 EXPECT_EQ(MakeWStringPiece(baz.begin(), baz.end()).data(), baz.data());
1350 EXPECT_EQ(MakeWStringPiece(baz.begin(), baz.end()).size(), baz.size());
1351 EXPECT_TRUE(MakeWStringPiece(baz.end(), baz.end()).empty());
1352}
1353
jhawkins@chromium.org0443f9122010-02-05 01:44:171354TEST(StringUtilTest, RemoveChars) {
thestig073d514d2014-10-21 03:11:211355 const char kRemoveChars[] = "-/+*";
jhawkins@chromium.org0443f9122010-02-05 01:44:171356 std::string input = "A-+bc/d!*";
1357 EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
1358 EXPECT_EQ("Abcd!", input);
1359
1360 // No characters match kRemoveChars.
1361 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1362 EXPECT_EQ("Abcd!", input);
1363
1364 // Empty string.
1365 input.clear();
1366 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1367 EXPECT_EQ(std::string(), input);
1368}
1369
asvitkine@chromium.org531e0342011-11-10 15:08:411370TEST(StringUtilTest, ReplaceChars) {
1371 struct TestData {
1372 const char* input;
1373 const char* replace_chars;
1374 const char* replace_with;
1375 const char* output;
1376 bool result;
1377 } cases[] = {
Nick Carterb1e364d2017-09-26 18:28:451378 {"", "", "", "", false},
1379 {"t", "t", "t", "t", true},
1380 {"a", "b", "c", "a", false},
1381 {"b", "b", "c", "c", true},
1382 {"bob", "b", "p", "pop", true},
1383 {"bob", "o", "i", "bib", true},
1384 {"test", "", "", "test", false},
1385 {"test", "", "!", "test", false},
1386 {"test", "z", "!", "test", false},
1387 {"test", "e", "!", "t!st", true},
1388 {"test", "e", "!?", "t!?st", true},
1389 {"test", "ez", "!", "t!st", true},
1390 {"test", "zed", "!?", "t!?st", true},
1391 {"test", "t", "!?", "!?es!?", true},
1392 {"test", "et", "!>", "!>!>s!>", true},
1393 {"test", "zest", "!", "!!!!", true},
1394 {"test", "szt", "!", "!e!!", true},
1395 {"test", "t", "test", "testestest", true},
1396 {"tetst", "t", "test", "testeteststest", true},
1397 {"ttttttt", "t", "-", "-------", true},
1398 {"aAaAaAAaAAa", "A", "", "aaaaa", true},
1399 {"xxxxxxxxxx", "x", "", "", true},
1400 {"xxxxxxxxxx", "x", "x", "xxxxxxxxxx", true},
1401 {"xxxxxxxxxx", "x", "y-", "y-y-y-y-y-y-y-y-y-y-", true},
1402 {"xxxxxxxxxx", "x", "xy", "xyxyxyxyxyxyxyxyxyxy", true},
1403 {"xxxxxxxxxx", "x", "zyx", "zyxzyxzyxzyxzyxzyxzyxzyxzyxzyx", true},
1404 {"xaxxaxxxaxxxax", "x", "xy", "xyaxyxyaxyxyxyaxyxyxyaxy", true},
1405 {"-xaxxaxxxaxxxax-", "x", "xy", "-xyaxyxyaxyxyxyaxyxyxyaxy-", true},
asvitkine@chromium.org531e0342011-11-10 15:08:411406 };
1407
Nick Carterb1e364d2017-09-26 18:28:451408 for (const TestData& scenario : cases) {
1409 // Test with separate output and input vars.
asvitkine@chromium.org531e0342011-11-10 15:08:411410 std::string output;
Nick Carterb1e364d2017-09-26 18:28:451411 bool result = ReplaceChars(scenario.input, scenario.replace_chars,
1412 scenario.replace_with, &output);
1413 EXPECT_EQ(scenario.result, result) << scenario.input;
1414 EXPECT_EQ(scenario.output, output);
1415 }
1416
1417 for (const TestData& scenario : cases) {
1418 // Test with an input/output var of limited capacity.
1419 std::string input_output = scenario.input;
1420 input_output.shrink_to_fit();
1421 bool result = ReplaceChars(input_output, scenario.replace_chars,
1422 scenario.replace_with, &input_output);
1423 EXPECT_EQ(scenario.result, result) << scenario.input;
1424 EXPECT_EQ(scenario.output, input_output);
1425 }
1426
1427 for (const TestData& scenario : cases) {
1428 // Test with an input/output var of ample capacity; should
1429 // not realloc.
1430 std::string input_output = scenario.input;
1431 input_output.reserve(strlen(scenario.output) * 2);
1432 const void* original_buffer = input_output.data();
1433 bool result = ReplaceChars(input_output, scenario.replace_chars,
1434 scenario.replace_with, &input_output);
1435 EXPECT_EQ(scenario.result, result) << scenario.input;
1436 EXPECT_EQ(scenario.output, input_output);
1437 EXPECT_EQ(original_buffer, input_output.data());
asvitkine@chromium.org531e0342011-11-10 15:08:411438 }
1439}
1440
jcampan@chromium.orgef122392010-02-10 18:26:111441TEST(StringUtilTest, ContainsOnlyChars) {
1442 // Providing an empty list of characters should return false but for the empty
1443 // string.
dcheng@chromium.org007b3f82013-04-09 08:46:451444 EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
1445 EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
jcampan@chromium.orgef122392010-02-10 18:26:111446
dcheng@chromium.org007b3f82013-04-09 08:46:451447 EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
jcampan@chromium.orgef122392010-02-10 18:26:111448 EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1449 EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1450 EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1451 EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
brettw@chromium.org1e1229a12014-03-11 23:16:241452
1453 EXPECT_TRUE(ContainsOnlyChars(std::string(), kWhitespaceASCII));
1454 EXPECT_TRUE(ContainsOnlyChars(" ", kWhitespaceASCII));
1455 EXPECT_TRUE(ContainsOnlyChars("\t", kWhitespaceASCII));
1456 EXPECT_TRUE(ContainsOnlyChars("\t \r \n ", kWhitespaceASCII));
1457 EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII));
1458 EXPECT_FALSE(ContainsOnlyChars("\thello\r \n ", kWhitespaceASCII));
1459
Jan Wilken Dörrie085b2aa2021-03-12 16:26:571460 EXPECT_TRUE(ContainsOnlyChars(std::u16string(), kWhitespaceUTF16));
Jan Wilken Dörriec92a6d7242021-03-23 17:43:481461 EXPECT_TRUE(ContainsOnlyChars(u" ", kWhitespaceUTF16));
1462 EXPECT_TRUE(ContainsOnlyChars(u"\t", kWhitespaceUTF16));
1463 EXPECT_TRUE(ContainsOnlyChars(u"\t \r \n ", kWhitespaceUTF16));
1464 EXPECT_FALSE(ContainsOnlyChars(u"a", kWhitespaceUTF16));
1465 EXPECT_FALSE(ContainsOnlyChars(u"\thello\r \n ", kWhitespaceUTF16));
jcampan@chromium.orgef122392010-02-10 18:26:111466}
1467
brettw8a800902015-07-10 18:28:331468TEST(StringUtilTest, CompareCaseInsensitiveASCII) {
1469 EXPECT_EQ(0, CompareCaseInsensitiveASCII("", ""));
1470 EXPECT_EQ(0, CompareCaseInsensitiveASCII("Asdf", "aSDf"));
1471
1472 // Differing lengths.
1473 EXPECT_EQ(-1, CompareCaseInsensitiveASCII("Asdf", "aSDfA"));
1474 EXPECT_EQ(1, CompareCaseInsensitiveASCII("AsdfA", "aSDf"));
1475
1476 // Differing values.
1477 EXPECT_EQ(-1, CompareCaseInsensitiveASCII("AsdfA", "aSDfb"));
1478 EXPECT_EQ(1, CompareCaseInsensitiveASCII("Asdfb", "aSDfA"));
Tsuyoshi Horo8137f4892022-10-27 01:55:521479
David Benjamin18fb9ea2022-11-22 18:22:331480 // Non-ASCII bytes are permitted, but they will be compared case-sensitively.
1481 EXPECT_EQ(0, CompareCaseInsensitiveASCII("aaa \xc3\xa4", "AAA \xc3\xa4"));
1482 EXPECT_EQ(-1, CompareCaseInsensitiveASCII("AAA \xc3\x84", "aaa \xc3\xa4"));
1483 EXPECT_EQ(1, CompareCaseInsensitiveASCII("aaa \xc3\xa4", "AAA \xc3\x84"));
1484
1485 // ASCII bytes should sort before non-ASCII ones.
1486 EXPECT_EQ(-1, CompareCaseInsensitiveASCII("a", "\xc3\xa4"));
1487 EXPECT_EQ(1, CompareCaseInsensitiveASCII("\xc3\xa4", "a"));
1488
Tsuyoshi Horo8137f4892022-10-27 01:55:521489 // For constexpr.
1490 static_assert(CompareCaseInsensitiveASCII("", "") == 0);
1491 static_assert(CompareCaseInsensitiveASCII("Asdf", "aSDf") == 0);
1492 static_assert(CompareCaseInsensitiveASCII("Asdf", "aSDfA") == -1);
1493 static_assert(CompareCaseInsensitiveASCII("AsdfA", "aSDf") == 1);
1494 static_assert(CompareCaseInsensitiveASCII("AsdfA", "aSDfb") == -1);
1495 static_assert(CompareCaseInsensitiveASCII("Asdfb", "aSDfA") == 1);
David Benjamin18fb9ea2022-11-22 18:22:331496 static_assert(CompareCaseInsensitiveASCII("aaa \xc3\xa4", "AAA \xc3\xa4") ==
1497 0);
1498 static_assert(CompareCaseInsensitiveASCII("AAA \xc3\x84", "aaa \xc3\xa4") ==
1499 -1);
1500 static_assert(CompareCaseInsensitiveASCII("aaa \xc3\xa4", "AAA \xc3\x84") ==
1501 1);
1502 static_assert(CompareCaseInsensitiveASCII("a", "\xc3\xa4") == -1);
1503 static_assert(CompareCaseInsensitiveASCII("\xc3\xa4", "a") == 1);
brettw8a800902015-07-10 18:28:331504}
1505
1506TEST(StringUtilTest, EqualsCaseInsensitiveASCII) {
1507 EXPECT_TRUE(EqualsCaseInsensitiveASCII("", ""));
1508 EXPECT_TRUE(EqualsCaseInsensitiveASCII("Asdf", "aSDF"));
1509 EXPECT_FALSE(EqualsCaseInsensitiveASCII("bsdf", "aSDF"));
1510 EXPECT_FALSE(EqualsCaseInsensitiveASCII("Asdf", "aSDFz"));
Dan McArdle262177b2022-05-20 20:21:371511
1512 EXPECT_TRUE(EqualsCaseInsensitiveASCII(u"", u""));
1513 EXPECT_TRUE(EqualsCaseInsensitiveASCII(u"Asdf", u"aSDF"));
1514 EXPECT_FALSE(EqualsCaseInsensitiveASCII(u"bsdf", u"aSDF"));
1515 EXPECT_FALSE(EqualsCaseInsensitiveASCII(u"Asdf", u"aSDFz"));
1516
1517 EXPECT_TRUE(EqualsCaseInsensitiveASCII(u"", ""));
1518 EXPECT_TRUE(EqualsCaseInsensitiveASCII(u"Asdf", "aSDF"));
1519 EXPECT_FALSE(EqualsCaseInsensitiveASCII(u"bsdf", "aSDF"));
1520 EXPECT_FALSE(EqualsCaseInsensitiveASCII(u"Asdf", "aSDFz"));
1521
1522 EXPECT_TRUE(EqualsCaseInsensitiveASCII("", u""));
1523 EXPECT_TRUE(EqualsCaseInsensitiveASCII("Asdf", u"aSDF"));
1524 EXPECT_FALSE(EqualsCaseInsensitiveASCII("bsdf", u"aSDF"));
1525 EXPECT_FALSE(EqualsCaseInsensitiveASCII("Asdf", u"aSDFz"));
1526
David Benjamin18fb9ea2022-11-22 18:22:331527 // Non-ASCII bytes are permitted, but they will be compared case-sensitively.
1528 EXPECT_TRUE(EqualsCaseInsensitiveASCII("aaa \xc3\xa4", "AAA \xc3\xa4"));
1529 EXPECT_FALSE(EqualsCaseInsensitiveASCII("aaa \xc3\x84", "AAA \xc3\xa4"));
1530
Dan McArdle262177b2022-05-20 20:21:371531 // The `WStringPiece` overloads are only defined on Windows.
1532#if BUILDFLAG(IS_WIN)
1533 EXPECT_TRUE(EqualsCaseInsensitiveASCII(L"", L""));
1534 EXPECT_TRUE(EqualsCaseInsensitiveASCII(L"Asdf", L"aSDF"));
1535 EXPECT_FALSE(EqualsCaseInsensitiveASCII(L"bsdf", L"aSDF"));
1536 EXPECT_FALSE(EqualsCaseInsensitiveASCII(L"Asdf", L"aSDFz"));
1537
1538 EXPECT_TRUE(EqualsCaseInsensitiveASCII(L"", ""));
1539 EXPECT_TRUE(EqualsCaseInsensitiveASCII(L"Asdf", "aSDF"));
1540 EXPECT_FALSE(EqualsCaseInsensitiveASCII(L"bsdf", "aSDF"));
1541 EXPECT_FALSE(EqualsCaseInsensitiveASCII(L"Asdf", "aSDFz"));
1542
1543 EXPECT_TRUE(EqualsCaseInsensitiveASCII("", L""));
1544 EXPECT_TRUE(EqualsCaseInsensitiveASCII("Asdf", L"aSDF"));
1545 EXPECT_FALSE(EqualsCaseInsensitiveASCII("bsdf", L"aSDF"));
1546 EXPECT_FALSE(EqualsCaseInsensitiveASCII("Asdf", L"aSDFz"));
1547#endif
brettw8a800902015-07-10 18:28:331548}
1549
eromane61e2b12015-10-13 23:39:221550TEST(StringUtilTest, IsUnicodeWhitespace) {
1551 // NOT unicode white space.
1552 EXPECT_FALSE(IsUnicodeWhitespace(L'\0'));
1553 EXPECT_FALSE(IsUnicodeWhitespace(L'A'));
1554 EXPECT_FALSE(IsUnicodeWhitespace(L'0'));
1555 EXPECT_FALSE(IsUnicodeWhitespace(L'.'));
1556 EXPECT_FALSE(IsUnicodeWhitespace(L';'));
1557 EXPECT_FALSE(IsUnicodeWhitespace(L'\x4100'));
1558
1559 // Actual unicode whitespace.
1560 EXPECT_TRUE(IsUnicodeWhitespace(L' '));
1561 EXPECT_TRUE(IsUnicodeWhitespace(L'\xa0'));
1562 EXPECT_TRUE(IsUnicodeWhitespace(L'\x3000'));
1563 EXPECT_TRUE(IsUnicodeWhitespace(L'\t'));
1564 EXPECT_TRUE(IsUnicodeWhitespace(L'\r'));
1565 EXPECT_TRUE(IsUnicodeWhitespace(L'\v'));
1566 EXPECT_TRUE(IsUnicodeWhitespace(L'\f'));
1567 EXPECT_TRUE(IsUnicodeWhitespace(L'\n'));
1568}
1569
pkasting@chromium.orgfdce4782011-11-29 20:06:181570class WriteIntoTest : public testing::Test {
1571 protected:
1572 static void WritesCorrectly(size_t num_chars) {
1573 std::string buffer;
1574 char kOriginal[] = "supercali";
1575 strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
1576 // Using std::string(buffer.c_str()) instead of |buffer| truncates the
1577 // string at the first \0.
Avi Drissmane3b70bf2019-01-04 19:50:221578 EXPECT_EQ(
Daniel Chengf45f47602022-02-28 22:38:321579 std::string(kOriginal, std::min(num_chars, std::size(kOriginal) - 1)),
Avi Drissmane3b70bf2019-01-04 19:50:221580 std::string(buffer.c_str()));
pkasting@chromium.orgfdce4782011-11-29 20:06:181581 EXPECT_EQ(num_chars, buffer.size());
1582 }
1583};
1584
1585TEST_F(WriteIntoTest, WriteInto) {
cbentzel@chromium.org11fe41a2011-10-18 15:44:591586 // Validate that WriteInto reserves enough space and
1587 // sizes a string correctly.
pkasting@chromium.orgfdce4782011-11-29 20:06:181588 WritesCorrectly(1);
1589 WritesCorrectly(2);
1590 WritesCorrectly(5000);
cbentzel@chromium.org11fe41a2011-10-18 15:44:591591
Shelley Vohr4c679122019-09-06 00:41:391592 // Validate that WriteInto handles 0-length strings
1593 std::string empty;
1594 const char kOriginal[] = "original";
1595 strncpy(WriteInto(&empty, 1), kOriginal, 0);
1596 EXPECT_STREQ("", empty.c_str());
1597 EXPECT_EQ(0u, empty.size());
1598
cbentzel@chromium.org11fe41a2011-10-18 15:44:591599 // Validate that WriteInto doesn't modify other strings
1600 // when using a Copy-on-Write implementation.
1601 const char kLive[] = "live";
1602 const char kDead[] = "dead";
1603 const std::string live = kLive;
1604 std::string dead = live;
1605 strncpy(WriteInto(&dead, 5), kDead, 4);
pkasting@chromium.orgfdce4782011-11-29 20:06:181606 EXPECT_EQ(kDead, dead);
cbentzel@chromium.org11fe41a2011-10-18 15:44:591607 EXPECT_EQ(4u, dead.size());
pkasting@chromium.orgfdce4782011-11-29 20:06:181608 EXPECT_EQ(kLive, live);
cbentzel@chromium.org11fe41a2011-10-18 15:44:591609 EXPECT_EQ(4u, live.size());
1610}
1611
Peter Kasting3c5f1db2023-03-09 21:50:591612} // namespace
1613
willchan@chromium.org270c6412010-03-29 22:02:471614} // namespace base