blob: 99775f34323d610b2a51caf9329473c9812e6334 [file] [log] [blame]
Avi Drissman505076bc2022-10-06 21:15:301// Copyright 2020 The Chromium Authors
Ben Kelly0e5c63e82020-11-12 21:24:082// Copyright 2014 Blake Embrey ([email protected])
3// Use of this source code is governed by an MIT-style license that can be
4// found in the LICENSE file or at https://opensource.org/licenses/MIT.
5
6#include "third_party/liburlpattern/pattern.h"
7
Takashi Nakayama3b396f302025-05-19 02:59:028#include <algorithm>
Helmut Januschkaed56d612024-07-12 21:11:099#include <optional>
Takashi Nakayama3fa3dc55722025-05-09 07:48:1710#include <string>
Helmut Januschkaed56d612024-07-12 21:11:0911#include <string_view>
Takashi Nakayama3b396f302025-05-19 02:59:0212#include <unordered_map>
13#include <utility>
14#include <vector>
Helmut Januschkaed56d612024-07-12 21:11:0915
Takashi Nakayama3b396f302025-05-19 02:59:0216#include "base/notreached.h"
17#include "base/types/expected.h"
Ben Kelly0e5c63e82020-11-12 21:24:0818#include "third_party/abseil-cpp/absl/base/macros.h"
Takashi Nakayama3b396f302025-05-19 02:59:0219#include "third_party/abseil-cpp/absl/status/status.h"
20#include "third_party/abseil-cpp/absl/strings/str_cat.h"
Ben Kelly0e5c63e82020-11-12 21:24:0821#include "third_party/abseil-cpp/absl/strings/str_format.h"
Ben Kellya3bf96b2021-12-08 20:55:1122#include "third_party/icu/source/common/unicode/utf8.h"
Ben Kelly36f7ba3e2020-11-24 19:48:4623#include "third_party/liburlpattern/utils.h"
Ben Kelly0e5c63e82020-11-12 21:24:0824
25namespace liburlpattern {
26
Ben Kelly36f7ba3e2020-11-24 19:48:4627namespace {
28
29void AppendModifier(Modifier modifier, std::string& append_target) {
30 switch (modifier) {
Ben Kelly22c632e82021-07-28 02:52:1031 case Modifier::kZeroOrMore:
32 append_target += '*';
Ben Kelly36f7ba3e2020-11-24 19:48:4633 break;
34 case Modifier::kOptional:
35 append_target += '?';
36 break;
Ben Kelly36f7ba3e2020-11-24 19:48:4637 case Modifier::kOneOrMore:
38 append_target += '+';
39 break;
Ben Kelly22c632e82021-07-28 02:52:1040 case Modifier::kNone:
41 break;
Ben Kelly36f7ba3e2020-11-24 19:48:4642 }
43}
44
45size_t ModifierLength(Modifier modifier) {
46 switch (modifier) {
Ben Kelly36f7ba3e2020-11-24 19:48:4647 case Modifier::kZeroOrMore:
Ben Kelly22c632e82021-07-28 02:52:1048 case Modifier::kOptional:
Ben Kelly36f7ba3e2020-11-24 19:48:4649 case Modifier::kOneOrMore:
50 return 1;
Ben Kelly22c632e82021-07-28 02:52:1051 case Modifier::kNone:
52 return 0;
Ben Kelly36f7ba3e2020-11-24 19:48:4653 }
54}
55
56} // namespace
57
Ben Kelly36f7ba3e2020-11-24 19:48:4658Pattern::Pattern(std::vector<Part> part_list,
59 Options options,
60 std::string segment_wildcard_regex)
61 : part_list_(std::move(part_list)),
62 options_(std::move(options)),
63 segment_wildcard_regex_(std::move(segment_wildcard_regex)) {}
64
Ben Kelly02c1d172021-03-16 15:33:2565std::string Pattern::GeneratePatternString() const {
66 std::string result;
67
68 // Estimate the final length and reserve a reasonable sized string
69 // buffer to avoid reallocations.
70 size_t estimated_length = 0;
71 for (const Part& part : part_list_) {
72 // Add an arbitrary extra 3 per Part to account for braces, modifier, etc.
73 estimated_length +=
74 part.prefix.size() + part.value.size() + part.suffix.size() + 3;
75 }
76 result.reserve(estimated_length);
77
Ben Kellyb55a4672021-12-02 23:48:3178 for (size_t i = 0; i < part_list_.size(); ++i) {
79 const Part& part = part_list_[i];
Ben Kellya3bf96b2021-12-08 20:55:1180
Ben Kelly02c1d172021-03-16 15:33:2581 if (part.type == PartType::kFixed) {
82 // A simple fixed string part.
83 if (part.modifier == Modifier::kNone) {
84 EscapePatternStringAndAppend(part.value, result);
85 continue;
86 }
87
88 // A fixed string, but with a modifier which requires a grouping.
89 // For example, `{foo}?`.
90 result += "{";
91 EscapePatternStringAndAppend(part.value, result);
92 result += "}";
93 AppendModifier(part.modifier, result);
94 continue;
95 }
96
Ben Kelly349f07c2021-12-08 20:51:1297 bool custom_name = part.HasCustomName();
98
99 // Determine if the part needs a grouping like `{ ... }`. This is
100 // necessary when the group:
101 //
102 // 1. is using a non-automatic prefix or any suffix.
Ben Kelly02c1d172021-03-16 15:33:25103 bool needs_grouping =
104 !part.suffix.empty() ||
105 (!part.prefix.empty() &&
106 (part.prefix.size() != 1 ||
Ben Kellyafd5c7342021-12-11 19:44:15107 options_.prefix_list.find(part.prefix[0]) == std::string::npos));
108
109 // 2. followed by a matching group that may be expressed in a way that can
110 // be mistakenly interpreted as part of this matching group. For
111 // example:
112 //
113 // a. An `(...)` expression following a `:foo` group. We want to
114 // output `{:foo}(...)` and not `:foo(...)`.
Ben Kellyf81ca3262021-12-13 22:40:08115 // b. A plaint text expression following a `:foo` group where the text
Ben Kellyafd5c7342021-12-11 19:44:15116 // could be mistakenly interpreted as part of the name. We want to
117 // output `{:foo}bar` and not `:foobar`.
118 const Part* next_part =
119 (i + 1) < part_list_.size() ? &part_list_[i + 1] : nullptr;
Ben Kelly1734a2a2022-01-20 23:49:59120 if (!needs_grouping && custom_name &&
Ben Kellyf81ca3262021-12-13 22:40:08121 part.type == PartType::kSegmentWildcard &&
122 part.modifier == Modifier::kNone && next_part &&
123 next_part->prefix.empty() && next_part->suffix.empty()) {
Ben Kellyafd5c7342021-12-11 19:44:15124 if (next_part->type == PartType::kFixed) {
125 UChar32 codepoint = -1;
126 U8_GET(reinterpret_cast<const uint8_t*>(next_part->value.data()), 0, 0,
127 static_cast<int>(next_part->value.size()), codepoint);
128 needs_grouping = IsNameCodepoint(codepoint, /*first_codepoint=*/false);
129 } else {
130 needs_grouping = !next_part->HasCustomName();
131 }
132 }
Ben Kelly02c1d172021-03-16 15:33:25133
Ben Kelly552dd792022-01-11 21:51:39134 // 3. preceded by a fixed text part that ends with an implicit prefix
135 // character (like `/`). This occurs when the original pattern used
136 // an escape or grouping to prevent the implicit prefix; e.g.
137 // `\\/*` or `/{*}`. In these cases we use a grouping to prevent the
138 // implicit prefix in the generated string.
139 const Part* last_part = i > 0 ? &part_list_[i - 1] : nullptr;
140 if (!needs_grouping && part.prefix.empty() && last_part &&
141 last_part->type == PartType::kFixed) {
142 needs_grouping = options_.prefix_list.find(last_part->value.back()) !=
143 std::string::npos;
144 }
145
Ben Kelly02c1d172021-03-16 15:33:25146 // This is a full featured part. We must generate a string that looks
147 // like:
148 //
149 // { <prefix> <value> <suffix> } <modifier>
150 //
151 // Where the { and } may not be needed. The <value> will be a regexp,
152 // named group, or wildcard.
153 if (needs_grouping)
154 result += "{";
155
156 EscapePatternStringAndAppend(part.prefix, result);
157
158 if (custom_name) {
159 result += ":";
160 result += part.name;
161 }
162
163 if (part.type == PartType::kRegex) {
164 result += "(";
165 result += part.value;
166 result += ")";
167 } else if (part.type == PartType::kSegmentWildcard) {
168 // We only need to emit a regexp if a custom name was
169 // not specified. A custom name like `:foo` gets the
170 // kSegmentWildcard type automatically.
171 if (!custom_name) {
172 result += "(";
173 result += segment_wildcard_regex_;
174 result += ")";
175 }
176 } else if (part.type == PartType::kFullWildcard) {
Ben Kellyb55a4672021-12-02 23:48:31177 // We can only use the `*` wildcard card if we meet a number
178 // of conditions. We must use an explicit `(.*)` group if:
179 //
180 // 1. A custom name was used; e.g. `:foo(.*)`.
181 // 2. If the preceding group is a matching group without a modifier; e.g.
182 // `(foo)(.*)`. In that case we cannot emit the `*` shorthand without
183 // it being mistakenly interpreted as the modifier for the previous
184 // group.
Ben Kelly552dd792022-01-11 21:51:39185 // 3. The current group is not enclosed in a `{ }` grouping.
186 // 4. The current group does not have an implicit prefix like `/`.
Ben Kellyb55a4672021-12-02 23:48:31187 if (!custom_name && (!last_part || last_part->type == PartType::kFixed ||
Ben Kelly552dd792022-01-11 21:51:39188 last_part->modifier != Modifier::kNone ||
189 needs_grouping || !part.prefix.empty())) {
Ben Kelly02c1d172021-03-16 15:33:25190 result += "*";
191 } else {
192 result += "(";
193 result += kFullWildcardRegex;
194 result += ")";
195 }
196 }
197
Ben Kellya3bf96b2021-12-08 20:55:11198 // If the matching group is a simple `:foo` custom name with the default
199 // segment wildcard, then we must check for a trailing suffix that could
200 // be interpreted as a trailing part of the name itself. In these cases
201 // we must escape the beginning of the suffix in order to separate it
202 // from the end of the custom name; e.g. `:foo\\bar` instead of `:foobar`.
203 if (part.type == PartType::kSegmentWildcard && custom_name &&
204 !part.suffix.empty()) {
205 UChar32 codepoint = -1;
206 U8_GET(reinterpret_cast<const uint8_t*>(part.suffix.data()), 0, 0,
207 static_cast<int>(part.suffix.size()), codepoint);
208 if (IsNameCodepoint(codepoint, /*first_codepoint=*/false)) {
209 result += "\\";
210 }
211 }
212
Ben Kelly02c1d172021-03-16 15:33:25213 EscapePatternStringAndAppend(part.suffix, result);
214
215 if (needs_grouping)
216 result += "}";
217
218 if (part.modifier != Modifier::kNone)
219 AppendModifier(part.modifier, result);
220 }
221
222 return result;
223}
224
Ben Kelly36f7ba3e2020-11-24 19:48:46225// The following code is a translation from the path-to-regexp typescript at:
226//
227// https://github.com/pillarjs/path-to-regexp/blob/125c43e6481f68cc771a5af22b914acdb8c5ba1f/src/index.ts#L532-L596
228std::string Pattern::GenerateRegexString(
229 std::vector<std::string>* name_list_out) const {
230 std::string result;
231
232 // This method mirrors the logic and structure of RegexStringLength(). If
233 // one changes, so should the other.
234
235 // Perform a full pass of the |part_list| to compute the length of the regex
236 // string to avoid additional allocations.
237 size_t expected_length = RegexStringLength();
238 result.reserve(RegexStringLength());
239
240 // Anchor to the start of the string if configured to in the options.
241 if (options_.start)
242 result += "^";
243
244 // Iterate over each Part and append its equivalent value to the expression
245 // string.
246 for (const Part& part : part_list_) {
247 // Handle kFixed Parts. If there is a modifier we must wrap the escaped
248 // value in a non-capturing group. Otherwise we just append the escaped
249 // value. For example:
250 //
251 // <escaped-fixed-value>
252 //
253 // Or:
254 //
255 // (?:<escaped-fixed-value>)<modifier>
256 //
257 if (part.type == PartType::kFixed) {
258 if (part.modifier == Modifier::kNone) {
Ben Kelly02c1d172021-03-16 15:33:25259 EscapeRegexpStringAndAppend(part.value, result);
Ben Kelly36f7ba3e2020-11-24 19:48:46260 } else {
261 result += "(?:";
Ben Kelly02c1d172021-03-16 15:33:25262 EscapeRegexpStringAndAppend(part.value, result);
Ben Kelly36f7ba3e2020-11-24 19:48:46263 result += ")";
264 AppendModifier(part.modifier, result);
265 }
266 continue;
267 }
268
269 // All remaining Part types must have a name. Append it to the output
270 // list if provided.
271 ABSL_ASSERT(!part.name.empty());
272 if (name_list_out)
273 name_list_out->push_back(part.name);
274
275 // Compute the Part regex value. For kSegmentWildcard and kFullWildcard
276 // types we must convert the type enum back to the defined regex value.
Helmut Januschkaed56d612024-07-12 21:11:09277 std::string_view regex_value = part.value;
Ben Kelly36f7ba3e2020-11-24 19:48:46278 if (part.type == PartType::kSegmentWildcard)
279 regex_value = segment_wildcard_regex_;
280 else if (part.type == PartType::kFullWildcard)
281 regex_value = kFullWildcardRegex;
282
Ben Kellybe37e472021-09-01 01:28:02283 // Handle the case where there is no prefix or suffix value. This varies a
284 // bit depending on the modifier.
285 //
286 // If there is no modifier or an optional modifier, then we simply wrap the
287 // regex value in a capturing group:
Ben Kelly36f7ba3e2020-11-24 19:48:46288 //
289 // (<regex-value>)<modifier>
290 //
Ben Kellybe37e472021-09-01 01:28:02291 // If there is a modifier, then we need to use a non-capturing group for the
292 // regex value and an outer capturing group that includes the modifier as
293 // well. Like:
294 //
295 // ((?:<regex-value>)<modifier>)
Ben Kelly36f7ba3e2020-11-24 19:48:46296 if (part.prefix.empty() && part.suffix.empty()) {
Ben Kellybe37e472021-09-01 01:28:02297 if (part.modifier == Modifier::kNone ||
298 part.modifier == Modifier::kOptional) {
299 absl::StrAppendFormat(&result, "(%s)", regex_value);
300 AppendModifier(part.modifier, result);
301 } else {
302 absl::StrAppendFormat(&result, "((?:%s)", regex_value);
303 AppendModifier(part.modifier, result);
304 result += ")";
305 }
Ben Kelly36f7ba3e2020-11-24 19:48:46306 continue;
307 }
308
309 // Handle non-repeating regex Parts with a prefix and/or suffix. The
310 // capturing group again only contains the regex value. This inner group
311 // is compined with the prefix and/or suffix in an outer non-capturing
312 // group. Finally the modifier is applied to the entire outer group.
313 // For example:
314 //
315 // (?:<prefix>(<regex-value>)<suffix>)<modifier>
316 //
317 if (part.modifier == Modifier::kNone ||
318 part.modifier == Modifier::kOptional) {
319 result += "(?:";
Ben Kelly02c1d172021-03-16 15:33:25320 EscapeRegexpStringAndAppend(part.prefix, result);
Ben Kelly36f7ba3e2020-11-24 19:48:46321 absl::StrAppendFormat(&result, "(%s)", regex_value);
Ben Kelly02c1d172021-03-16 15:33:25322 EscapeRegexpStringAndAppend(part.suffix, result);
Ben Kelly36f7ba3e2020-11-24 19:48:46323 result += ")";
324 AppendModifier(part.modifier, result);
325 continue;
326 }
327
328 // Repeating Parts are dramatically more complicated. We want to exclude
329 // the initial prefix and the final suffix, but include them between any
330 // repeated elements. To achieve this we provide a separate initial
331 // part that excludes the prefix. Then the part is duplicated with the
332 // prefix/suffix values included in an optional repeating element. If
333 // zero values are permitted then a final optional modifier may be added.
334 // For example:
335 //
336 // (?:<prefix>((?:<regex-value>)(?:<suffix><prefix>(?:<regex-value>))*)<suffix>)?
337 //
338 result += "(?:";
Ben Kelly02c1d172021-03-16 15:33:25339 EscapeRegexpStringAndAppend(part.prefix, result);
Ben Kelly36f7ba3e2020-11-24 19:48:46340 absl::StrAppendFormat(&result, "((?:%s)(?:", regex_value);
Ben Kelly02c1d172021-03-16 15:33:25341 EscapeRegexpStringAndAppend(part.suffix, result);
342 EscapeRegexpStringAndAppend(part.prefix, result);
Ben Kelly36f7ba3e2020-11-24 19:48:46343 absl::StrAppendFormat(&result, "(?:%s))*)", regex_value);
Ben Kelly02c1d172021-03-16 15:33:25344 EscapeRegexpStringAndAppend(part.suffix, result);
Ben Kelly36f7ba3e2020-11-24 19:48:46345 result += ")";
346 if (part.modifier == Modifier::kZeroOrMore)
347 result += "?";
348 }
349
350 // Should we anchor the pattern to the end of the input string?
351 if (options_.end) {
352 // In non-strict mode an optional delimiter character is always
353 // permitted at the end of the string. For example, if the pattern
354 // is "/foo/bar" then it would match "/foo/bar/".
355 //
356 // [<delimiter chars>]?
357 //
358 if (!options_.strict) {
359 AppendDelimiterList(result);
360 result += "?";
361 }
362
363 // The options ends_with value contains a list of characters that
364 // may also signal the end of the pattern match.
365 if (options_.ends_with.empty()) {
366 // Simply anchor to the end of the input string.
367 result += "$";
368 } else {
369 // Anchor to either a ends_with character or the end of the input
370 // string. This uses a lookahead assertion.
371 //
372 // (?=[<ends_with chars>]|$)
373 //
374 result += "(?=";
375 AppendEndsWith(result);
376 result += ")";
377 }
378
379 return result;
380 }
381
382 // We are not anchored to the end of the input string.
383
384 // Again, if not in strict mode we permit an optional trailing delimiter
385 // character before anchoring to any ends_with characters with a lookahead
386 // assertion.
387 //
388 // (?:[<delimiter chars>](?=[<ends_with chars>]|$))?
389 //
390 if (!options_.strict) {
391 result += "(?:";
392 AppendDelimiterList(result);
393 result += "(?=";
394 AppendEndsWith(result);
395 result += "))?";
396 }
397
398 // Further, if the pattern does not end with a trailing delimiter character
399 // we also anchor to a delimiter character in our lookahead assertion. So
400 // a pattern "/foo/bar" would match "/foo/bar/baz", but not "/foo/barbaz".
401 //
402 // (?=[<delimiter chars>]|[<ends_with chars>]|$)
403 //
404 bool end_delimited = false;
405 if (!part_list_.empty()) {
406 auto& last_part = part_list_.back();
407 if (last_part.type == PartType::kFixed &&
408 last_part.modifier == Modifier::kNone) {
409 ABSL_ASSERT(!last_part.value.empty());
410 end_delimited = options_.delimiter_list.find(last_part.value.back()) !=
411 std::string::npos;
412 }
413 }
414 if (!end_delimited) {
415 result += "(?=";
416 AppendDelimiterList(result);
417 result += "|";
418 AppendEndsWith(result);
419 result += ")";
420 }
421
422 ABSL_ASSERT(result.size() == expected_length);
423 return result;
424}
425
Jeremy Roman8f369d22023-11-24 21:36:44426bool Pattern::HasRegexGroups() const {
427 for (const Part& part : part_list_) {
428 if (part.type == PartType::kRegex) {
429 return true;
430 }
431 }
432 return false;
433}
434
Ben Kelly19cd8192021-08-23 16:04:54435bool Pattern::CanDirectMatch() const {
436 // We currently only support direct matching with the options used by
437 // URLPattern.
Ben Kelly44fc5322021-08-23 19:59:39438 if (!options_.start || !options_.end || !options_.strict ||
439 !options_.sensitive) {
440 return false;
441 }
442
Ben Kellyc7d3d6c2021-09-01 18:09:25443 return part_list_.empty() || IsOnlyFullWildcard() || IsOnlyFixedText();
Ben Kelly19cd8192021-08-23 16:04:54444}
445
446bool Pattern::DirectMatch(
Helmut Januschkaed56d612024-07-12 21:11:09447 std::string_view input,
Ben Kelly7648458f92022-02-04 22:04:05448 std::vector<
Helmut Januschkaed56d612024-07-12 21:11:09449 std::pair<std::string_view, std::optional<std::string_view>>>*
Ben Kelly19cd8192021-08-23 16:04:54450 group_list_out) const {
451 ABSL_ASSERT(CanDirectMatch());
Ben Kelly44fc5322021-08-23 19:59:39452
453 if (part_list_.empty())
454 return input.empty();
455
Ben Kelly19cd8192021-08-23 16:04:54456 if (IsOnlyFullWildcard()) {
457 if (group_list_out)
458 group_list_out->emplace_back(part_list_[0].name, input);
459 return true;
460 }
Ben Kelly44fc5322021-08-23 19:59:39461
Ben Kellyc7d3d6c2021-09-01 18:09:25462 if (IsOnlyFixedText()) {
463 return part_list_[0].value == input;
464 }
465
Ben Kelly19cd8192021-08-23 16:04:54466 return false;
467}
468
Takashi Nakayama3b396f302025-05-19 02:59:02469base::expected<std::string, absl::Status> Pattern::Generate(
470 const std::unordered_map<std::string, std::string>& groups,
471 EncodeCallback callback) const {
472 std::string result;
473 for (auto&& p : part_list_) {
474 if (p.modifier != Modifier::kNone) {
475 return base::unexpected(absl::UnimplementedError(
476 "Patterns with modifiers are not supported."));
477 }
478 switch (p.type) {
479 case PartType::kFixed: {
480 ABSL_ASSERT(p.prefix.empty() && p.suffix.empty());
481 result += p.value;
482 continue;
483 }
484 case PartType::kSegmentWildcard: {
485 if (!p.HasCustomName()) {
486 // Reaches when input patterns has a RegExp that is identical to
487 // the segment wildcard regex string.
488 // e.g. { pathname: "/([^\\/]+?)" }
489 return base::unexpected(absl::UnimplementedError(
490 "Segment-Wildcards with numeric names are not supported."));
491 }
492
493 // Note that names are not encoded while we should encode values.
494 auto it = groups.find(p.name);
495 if (it == groups.end()) {
496 return base::unexpected(absl::InvalidArgumentError(
497 absl::StrFormat("No input found for `%s`", p.name)));
498 }
499
500 base::expected<std::string, absl::Status> encoded_value_result =
501 callback(it->second);
502 if (!encoded_value_result.has_value()) {
503 return base::unexpected(encoded_value_result.error());
504 }
505
506 std::string& value = encoded_value_result.value();
507
508 // Throws error if input strings have delimiter chars.
509 // TODO(crbug.com/414682820): support this according to specification
510 // discussions.
511 for (auto delimiter : options_.delimiter_list) {
512 if (value.find(delimiter) != std::string::npos) {
513 return base::unexpected(absl::UnimplementedError(absl::StrFormat(
514 "Unsupported input: `%s` contains delimiter char `%c`.", value,
515 delimiter)));
516 }
517 }
518
519 absl::StrAppend(&result, p.prefix, value, p.suffix);
520 continue;
521 }
522 case PartType::kFullWildcard:
523 case PartType::kRegex:
524 return base::unexpected(absl::UnimplementedError(
525 "Patterns with Full-Wildcards or RegExp are not supported."));
526 }
527 NOTREACHED();
528 }
529 return result;
530}
531
Ben Kelly36f7ba3e2020-11-24 19:48:46532size_t Pattern::RegexStringLength() const {
533 size_t result = 0;
534
535 // This method mirrors the logic and structure of GenerateRegexString(). If
536 // one changes, so should the other. See GenerateRegexString() for an
537 // explanation of the logic.
538
539 if (options_.start) {
540 // ^
541 result += 1;
542 }
543
544 for (const Part& part : part_list_) {
545 if (part.type == PartType::kFixed) {
546 if (part.modifier == Modifier::kNone) {
547 // <escaped-fixed-value>
Ben Kelly02c1d172021-03-16 15:33:25548 result += EscapedRegexpStringLength(part.value);
Ben Kelly36f7ba3e2020-11-24 19:48:46549 } else {
550 // (?:<escaped-fixed-value>)<modifier>
Ben Kelly02c1d172021-03-16 15:33:25551 result += EscapedRegexpStringLength(part.value) + 4 +
552 ModifierLength(part.modifier);
Ben Kelly36f7ba3e2020-11-24 19:48:46553 }
554 continue;
555 }
556
Helmut Januschkaed56d612024-07-12 21:11:09557 std::string_view regex_value = part.value;
Ben Kelly36f7ba3e2020-11-24 19:48:46558 if (part.type == PartType::kSegmentWildcard)
559 regex_value = segment_wildcard_regex_;
560 else if (part.type == PartType::kFullWildcard)
561 regex_value = kFullWildcardRegex;
562
563 if (part.prefix.empty() && part.suffix.empty()) {
564 // (<regex-value>)<modifier>
565 result += regex_value.size() + ModifierLength(part.modifier) + 2;
566 continue;
567 }
568
Ben Kelly02c1d172021-03-16 15:33:25569 size_t prefix_length = EscapedRegexpStringLength(part.prefix);
570 size_t suffix_length = EscapedRegexpStringLength(part.suffix);
Ben Kelly36f7ba3e2020-11-24 19:48:46571
572 if (part.modifier == Modifier::kNone ||
573 part.modifier == Modifier::kOptional) {
574 // (?:<prefix>(<regex-value>)<suffix>)<modifier>
575 result += prefix_length + regex_value.size() + suffix_length +
576 ModifierLength(part.modifier) + 6;
577 continue;
578 }
579
580 // (?:<prefix>((?:<regex-value>)(?:<suffix><prefix>(?:<regex-value>))*)<suffix>)?
581 result += prefix_length + regex_value.size() + suffix_length +
582 prefix_length + regex_value.size() + suffix_length + 19;
583 if (part.modifier == Modifier::kZeroOrMore)
584 result += 1;
585 }
586
587 if (options_.end) {
588 if (!options_.strict) {
589 // [<delimiter chars>]?
590 result += DelimiterListLength() + 1;
591 }
592
593 if (options_.ends_with.empty()) {
594 // $
595 result += 1;
596 } else {
597 // (?=[<ends_with chars>]|$)
598 result += EndsWithLength() + 4;
599 }
600 } else {
601 bool end_delimited = false;
602 if (!part_list_.empty()) {
603 auto& last_part = part_list_.back();
604 if (last_part.type == PartType::kFixed &&
605 last_part.modifier == Modifier::kNone) {
606 ABSL_ASSERT(!last_part.value.empty());
607 end_delimited = options_.delimiter_list.find(last_part.value.back()) !=
608 std::string::npos;
609 }
610 }
611
612 if (!options_.strict) {
613 // (?:[<delimiter chars>](?=[<ends_with chars>]|$))?
614 result += DelimiterListLength() + EndsWithLength() + 9;
615 }
616
617 if (!end_delimited) {
618 // (?=[<delimiter chars>]|[<ends_with chars>]|$)
619 result += DelimiterListLength() + EndsWithLength() + 5;
620 }
621 }
622
623 return result;
624}
625
626void Pattern::AppendDelimiterList(std::string& append_target) const {
627 append_target += "[";
Ben Kelly02c1d172021-03-16 15:33:25628 EscapeRegexpStringAndAppend(options_.delimiter_list, append_target);
Ben Kelly36f7ba3e2020-11-24 19:48:46629 append_target += "]";
630}
631
632size_t Pattern::DelimiterListLength() const {
Ben Kelly02c1d172021-03-16 15:33:25633 return EscapedRegexpStringLength(options_.delimiter_list) + 2;
Ben Kelly36f7ba3e2020-11-24 19:48:46634}
635
636void Pattern::AppendEndsWith(std::string& append_target) const {
637 append_target += "[";
Ben Kelly02c1d172021-03-16 15:33:25638 EscapeRegexpStringAndAppend(options_.ends_with, append_target);
Ben Kelly36f7ba3e2020-11-24 19:48:46639 append_target += "]|$";
640}
641
642size_t Pattern::EndsWithLength() const {
Ben Kelly02c1d172021-03-16 15:33:25643 return EscapedRegexpStringLength(options_.ends_with) + 4;
Ben Kelly36f7ba3e2020-11-24 19:48:46644}
Ben Kelly0e5c63e82020-11-12 21:24:08645
Ben Kelly19cd8192021-08-23 16:04:54646bool Pattern::IsOnlyFullWildcard() const {
647 if (part_list_.size() != 1)
648 return false;
649 auto& part = part_list_[0];
650 // The modifier does not matter as an optional or repeated full wildcard
651 // is functionally equivalent.
652 return part.type == PartType::kFullWildcard && part.prefix.empty() &&
653 part.suffix.empty();
654}
655
Ben Kellyc7d3d6c2021-09-01 18:09:25656bool Pattern::IsOnlyFixedText() const {
657 if (part_list_.size() != 1)
658 return false;
659 auto& part = part_list_[0];
660 bool result =
661 part.type == PartType::kFixed && part.modifier == Modifier::kNone;
662 if (result) {
663 ABSL_ASSERT(part.prefix.empty());
664 ABSL_ASSERT(part.suffix.empty());
665 }
666 return result;
667}
668
Ben Kelly0e5c63e82020-11-12 21:24:08669} // namespace liburlpattern