Avi Drissman | 505076bc | 2022-10-06 21:15:30 | [diff] [blame] | 1 | // Copyright 2020 The Chromium Authors |
Ben Kelly | ad8343ba | 2020-10-28 18:48:53 | [diff] [blame] | 2 | // Copyright 2014 Blake Embrey ([email protected]) |
| 3 | // Use of this source code is governed by an MIT-style license that can be |
| 4 | // found in the LICENSE file or at https://opensource.org/licenses/MIT. |
| 5 | |
| 6 | #include "third_party/liburlpattern/parse.h" |
| 7 | |
Helmut Januschka | ed56d61 | 2024-07-12 21:11:09 | [diff] [blame] | 8 | #include <string_view> |
Ben Kelly | 003c548 | 2021-08-17 19:28:17 | [diff] [blame] | 9 | #include <unordered_set> |
| 10 | |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 11 | #include "third_party/abseil-cpp/absl/base/macros.h" |
Takashi Nakayama | ff8e61c | 2025-05-02 05:17:04 | [diff] [blame] | 12 | #include "third_party/abseil-cpp/absl/status/statusor.h" |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 13 | #include "third_party/abseil-cpp/absl/strings/str_format.h" |
Ben Kelly | ad8343ba | 2020-10-28 18:48:53 | [diff] [blame] | 14 | #include "third_party/liburlpattern/pattern.h" |
Ben Kelly | 9bd0002d | 2020-11-05 03:34:18 | [diff] [blame] | 15 | #include "third_party/liburlpattern/tokenize.h" |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 16 | #include "third_party/liburlpattern/utils.h" |
| 17 | |
| 18 | // The following code is a translation from the path-to-regexp typescript at: |
| 19 | // |
| 20 | // https://github.com/pillarjs/path-to-regexp/blob/125c43e6481f68cc771a5af22b914acdb8c5ba1f/src/index.ts#L126-L232 |
Ben Kelly | ad8343ba | 2020-10-28 18:48:53 | [diff] [blame] | 21 | |
| 22 | namespace liburlpattern { |
| 23 | |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 24 | namespace { |
| 25 | |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 26 | // Helper class that tracks the parser state. |
| 27 | class State { |
| 28 | public: |
Ben Kelly | d18c0e82 | 2021-03-05 22:42:17 | [diff] [blame] | 29 | State(std::vector<Token> token_list, |
| 30 | EncodeCallback encode_callback, |
| 31 | Options options) |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 32 | : token_list_(std::move(token_list)), |
Ben Kelly | d18c0e82 | 2021-03-05 22:42:17 | [diff] [blame] | 33 | encode_callback_(std::move(encode_callback)), |
Ben Kelly | 36f7ba3e | 2020-11-24 19:48:46 | [diff] [blame] | 34 | options_(std::move(options)), |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 35 | segment_wildcard_regex_( |
Ben Kelly | 02c1d17 | 2021-03-16 15:33:25 | [diff] [blame] | 36 | absl::StrFormat("[^%s]+?", |
| 37 | EscapeRegexpString(options_.delimiter_list))) {} |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 38 | |
| 39 | // Return true if there are more tokens to process. |
| 40 | bool HasMoreTokens() const { return index_ < token_list_.size(); } |
| 41 | |
| 42 | // Attempt to consume the next Token, but only if it matches the given |
| 43 | // |type|. Returns a pointer to the Token on success or nullptr on failure. |
| 44 | const Token* TryConsume(TokenType type) { |
| 45 | ABSL_ASSERT(index_ < token_list_.size()); |
| 46 | TokenType next_type = token_list_[index_].type; |
| 47 | if (next_type != type) |
| 48 | return nullptr; |
| 49 | |
| 50 | // The last token should always be kEnd. |
| 51 | if ((index_ + 1) == token_list_.size()) |
| 52 | ABSL_ASSERT(token_list_[index_].type == TokenType::kEnd); |
| 53 | |
| 54 | return &(token_list_[index_++]); |
| 55 | } |
| 56 | |
| 57 | // Consume the next Token requiring it to be the given |type|. If this |
| 58 | // is not possible then return an error. |
| 59 | absl::StatusOr<const Token*> MustConsume(TokenType type) { |
| 60 | ABSL_ASSERT(index_ < token_list_.size()); |
| 61 | if (const Token* token = TryConsume(type)) |
| 62 | return token; |
Ben Kelly | 18869af75 | 2021-07-14 02:08:25 | [diff] [blame] | 63 | return absl::InvalidArgumentError(absl::StrFormat( |
| 64 | "Unexpected %s '%s' at index %d, expected %s.", |
| 65 | TokenTypeToString(token_list_[index_].type), token_list_[index_].value, |
| 66 | token_list_[index_].index, TokenTypeToString(type))); |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 67 | } |
| 68 | |
Ben Kelly | d228e3c | 2021-02-24 00:43:40 | [diff] [blame] | 69 | const Token* TryConsumeModifier() { |
| 70 | const Token* result = TryConsume(TokenType::kOtherModifier); |
| 71 | if (!result) |
| 72 | result = TryConsume(TokenType::kAsterisk); |
| 73 | return result; |
| 74 | } |
| 75 | |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 76 | // Consume as many sequential kChar and kEscapedChar Tokens as possible |
| 77 | // appending them together into a single string value. |
| 78 | std::string ConsumeText() { |
| 79 | // Unfortunately we cannot use a view here and must copy into a new |
| 80 | // string. This is necessary to flatten escape sequences into |
| 81 | // a single value with other characters. |
| 82 | std::string result; |
| 83 | const Token* token = nullptr; |
| 84 | do { |
| 85 | token = TryConsume(TokenType::kChar); |
| 86 | if (!token) |
| 87 | token = TryConsume(TokenType::kEscapedChar); |
| 88 | if (token) |
| 89 | result.append(token->value.data(), token->value.size()); |
| 90 | } while (token); |
| 91 | return result; |
| 92 | } |
| 93 | |
| 94 | // Append the given Token value to the pending fixed value. This will |
| 95 | // be converted to a kFixed Part when we reach the end of a run of |
| 96 | // kChar and kEscapedChar tokens. |
Helmut Januschka | ed56d61 | 2024-07-12 21:11:09 | [diff] [blame] | 97 | void AppendToPendingFixedValue(std::string_view token_value) { |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 98 | pending_fixed_value_.append(token_value.data(), token_value.size()); |
| 99 | } |
| 100 | |
| 101 | // Convert the pending fixed value, if any, to a kFixed Part. Has no effect |
| 102 | // if there is no pending value. |
Ben Kelly | d18c0e82 | 2021-03-05 22:42:17 | [diff] [blame] | 103 | absl::Status MaybeAddPartFromPendingFixedValue() { |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 104 | if (pending_fixed_value_.empty()) |
Ben Kelly | d18c0e82 | 2021-03-05 22:42:17 | [diff] [blame] | 105 | return absl::OkStatus(); |
| 106 | |
| 107 | auto encoded_result = encode_callback_(std::move(pending_fixed_value_)); |
Takashi Nakayama | e011ac4 | 2025-05-02 00:20:01 | [diff] [blame] | 108 | if (!encoded_result.has_value()) { |
| 109 | return encoded_result.error(); |
| 110 | } |
Ben Kelly | d18c0e82 | 2021-03-05 22:42:17 | [diff] [blame] | 111 | |
| 112 | part_list_.emplace_back(PartType::kFixed, std::move(encoded_result.value()), |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 113 | Modifier::kNone); |
| 114 | pending_fixed_value_ = ""; |
Ben Kelly | d18c0e82 | 2021-03-05 22:42:17 | [diff] [blame] | 115 | return absl::OkStatus(); |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 116 | } |
| 117 | |
| 118 | // Add a Part for the given set of tokens. |
Ben Kelly | d18c0e82 | 2021-03-05 22:42:17 | [diff] [blame] | 119 | absl::Status AddPart(std::string prefix, |
| 120 | const Token* name_token, |
| 121 | const Token* regex_or_wildcard_token, |
| 122 | std::string suffix, |
| 123 | const Token* modifier_token) { |
Ben Kelly | d228e3c | 2021-02-24 00:43:40 | [diff] [blame] | 124 | // Convert the modifier Token into a Modifier enum value. |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 125 | Modifier modifier = Modifier::kNone; |
| 126 | if (modifier_token) { |
| 127 | ABSL_ASSERT(!modifier_token->value.empty()); |
| 128 | switch (modifier_token->value[0]) { |
| 129 | case '?': |
| 130 | modifier = Modifier::kOptional; |
| 131 | break; |
| 132 | case '*': |
| 133 | modifier = Modifier::kZeroOrMore; |
| 134 | break; |
| 135 | case '+': |
| 136 | modifier = Modifier::kOneOrMore; |
| 137 | break; |
| 138 | default: |
| 139 | ABSL_ASSERT(false); |
| 140 | break; |
| 141 | } |
| 142 | } |
| 143 | |
Ben Kelly | ccecd87 | 2021-07-27 18:37:17 | [diff] [blame] | 144 | // If this is a `{ ... }` grouping containing only fixed text, then |
| 145 | // just add it to our pending value for now. We want to collect as |
| 146 | // much fixed text as possible in the buffer before commiting it to |
| 147 | // a kFixed Part. |
| 148 | if (!name_token && !regex_or_wildcard_token && |
| 149 | modifier == Modifier::kNone) { |
| 150 | AppendToPendingFixedValue(prefix); |
| 151 | return absl::OkStatus(); |
| 152 | } |
| 153 | |
| 154 | // We are about to add some kind of matching group Part to the list. |
| 155 | // Before doing that make sure to flush any pending fixed test to a |
| 156 | // kFixed Part. |
| 157 | absl::Status status = MaybeAddPartFromPendingFixedValue(); |
| 158 | if (!status.ok()) |
| 159 | return status; |
| 160 | |
Ben Kelly | d228e3c | 2021-02-24 00:43:40 | [diff] [blame] | 161 | // If there is no name, regex, or wildcard tokens then this is just a fixed |
| 162 | // string grouping; e.g. "{foo}?". The fixed string ends up in the prefix |
| 163 | // value since it consumed the entire text of the grouping. If the prefix |
| 164 | // value is empty then its an empty "{}" group and we return without adding |
| 165 | // any Part. |
| 166 | if (!name_token && !regex_or_wildcard_token) { |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 167 | ABSL_ASSERT(suffix.empty()); |
Ben Kelly | d18c0e82 | 2021-03-05 22:42:17 | [diff] [blame] | 168 | if (prefix.empty()) |
| 169 | return absl::OkStatus(); |
| 170 | auto result = encode_callback_(std::move(prefix)); |
Takashi Nakayama | e011ac4 | 2025-05-02 00:20:01 | [diff] [blame] | 171 | if (!result.has_value()) { |
| 172 | return result.error(); |
| 173 | } |
Ben Kelly | d18c0e82 | 2021-03-05 22:42:17 | [diff] [blame] | 174 | part_list_.emplace_back(PartType::kFixed, *result, modifier); |
| 175 | return absl::OkStatus(); |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 176 | } |
| 177 | |
Ben Kelly | d228e3c | 2021-02-24 00:43:40 | [diff] [blame] | 178 | // Determine the regex value. If there is a |kRegex| Token, then this is |
| 179 | // explicitly set by that Token. If there is a wildcard token, then this |
| 180 | // is set to the |kFullWildcardRegex| constant. Otherwise a kName Token by |
| 181 | // itself gets an implicit regex value that matches through to the end of |
| 182 | // the segment. This is represented by the |segment_wildcard_regex_| value. |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 183 | std::string regex_value; |
Ben Kelly | d228e3c | 2021-02-24 00:43:40 | [diff] [blame] | 184 | if (!regex_or_wildcard_token) |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 185 | regex_value = segment_wildcard_regex_; |
Ben Kelly | d228e3c | 2021-02-24 00:43:40 | [diff] [blame] | 186 | else if (regex_or_wildcard_token->type == TokenType::kAsterisk) |
| 187 | regex_value = kFullWildcardRegex; |
| 188 | else |
| 189 | regex_value = std::string(regex_or_wildcard_token->value); |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 190 | |
| 191 | // Next determine the type of the Part. This depends on the regex value |
| 192 | // since we give certain values special treatment with their own type. |
| 193 | // A |segment_wildcard_regex_| is mapped to the kSegmentWildcard type. A |
Ben Kelly | 36f7ba3e | 2020-11-24 19:48:46 | [diff] [blame] | 194 | // |kFullWildcardRegex| is mapped to the kFullWildcard type. Otherwise |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 195 | // the Part gets the kRegex type. |
| 196 | PartType type = PartType::kRegex; |
| 197 | if (regex_value == segment_wildcard_regex_) { |
| 198 | type = PartType::kSegmentWildcard; |
| 199 | regex_value = ""; |
Ben Kelly | 36f7ba3e | 2020-11-24 19:48:46 | [diff] [blame] | 200 | } else if (regex_value == kFullWildcardRegex) { |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 201 | type = PartType::kFullWildcard; |
| 202 | regex_value = ""; |
| 203 | } |
| 204 | |
| 205 | // Every kRegex, kSegmentWildcard, and kFullWildcard Part must have a |
| 206 | // group name. If there was a kName Token, then use the explicitly |
| 207 | // set name. Otherwise we generate a numeric based key for the name. |
| 208 | std::string name; |
| 209 | if (name_token) |
| 210 | name = std::string(name_token->value); |
Ben Kelly | d228e3c | 2021-02-24 00:43:40 | [diff] [blame] | 211 | else if (regex_or_wildcard_token) |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 212 | name = GenerateKey(); |
| 213 | |
Ben Kelly | 003c548 | 2021-08-17 19:28:17 | [diff] [blame] | 214 | auto name_set_result = name_set_.insert(name); |
| 215 | if (!name_set_result.second) { |
| 216 | return absl::InvalidArgumentError( |
| 217 | absl::StrFormat("Duplicate group name '%s' at index %d.", name, |
| 218 | token_list_[index_].index)); |
| 219 | } |
| 220 | |
Ben Kelly | d18c0e82 | 2021-03-05 22:42:17 | [diff] [blame] | 221 | auto prefix_result = encode_callback_(std::move(prefix)); |
Takashi Nakayama | e011ac4 | 2025-05-02 00:20:01 | [diff] [blame] | 222 | if (!prefix_result.has_value()) { |
| 223 | return prefix_result.error(); |
| 224 | } |
Ben Kelly | d18c0e82 | 2021-03-05 22:42:17 | [diff] [blame] | 225 | |
| 226 | auto suffix_result = encode_callback_(std::move(suffix)); |
Takashi Nakayama | e011ac4 | 2025-05-02 00:20:01 | [diff] [blame] | 227 | if (!suffix_result.has_value()) { |
| 228 | return suffix_result.error(); |
| 229 | } |
Ben Kelly | d18c0e82 | 2021-03-05 22:42:17 | [diff] [blame] | 230 | |
| 231 | // Finally add the part to the list. We encode the prefix and suffix, but |
| 232 | // must be careful not to encode the regex value since it can change the |
| 233 | // meaning of the regular expression. |
| 234 | part_list_.emplace_back(type, std::move(name), *prefix_result, |
| 235 | std::move(regex_value), *suffix_result, modifier); |
| 236 | return absl::OkStatus(); |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 237 | } |
| 238 | |
Ben Kelly | 36f7ba3e | 2020-11-24 19:48:46 | [diff] [blame] | 239 | Pattern TakeAsPattern() { |
| 240 | return Pattern(std::move(part_list_), std::move(options_), |
| 241 | std::move(segment_wildcard_regex_)); |
| 242 | } |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 243 | |
| 244 | private: |
| 245 | // Generate a numeric key string to be used for groups that do not |
| 246 | // have an explicit kName Token. |
| 247 | std::string GenerateKey() { return absl::StrFormat("%d", next_key_++); } |
| 248 | |
| 249 | // The input list of Token objects to process. |
| 250 | const std::vector<Token> token_list_; |
| 251 | |
Ben Kelly | d18c0e82 | 2021-03-05 22:42:17 | [diff] [blame] | 252 | EncodeCallback encode_callback_; |
| 253 | |
Ben Kelly | 36f7ba3e | 2020-11-24 19:48:46 | [diff] [blame] | 254 | // The set of options used to parse and construct this Pattern. This |
| 255 | // controls the behavior of things like kSegmentWildcard parts, etc. |
| 256 | Options options_; |
| 257 | |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 258 | // The special regex value corresponding to the default regex value |
| 259 | // given to a lone kName Token. This is a variable since its value |
| 260 | // is dependent on the |delimiter_list| passed to the constructor. |
| 261 | const std::string segment_wildcard_regex_; |
| 262 | |
| 263 | // The output list of Pattern Part objects. |
| 264 | std::vector<Part> part_list_; |
| 265 | |
Ben Kelly | 003c548 | 2021-08-17 19:28:17 | [diff] [blame] | 266 | // Tracks which names have been seen before so we can error on duplicates. |
| 267 | std::unordered_set<std::string> name_set_; |
| 268 | |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 269 | // A buffer of kChar and kEscapedChar values that are pending the creation |
| 270 | // of a kFixed Part. |
| 271 | std::string pending_fixed_value_; |
| 272 | |
| 273 | // The index of the next Token in |token_list_|. |
| 274 | size_t index_ = 0; |
| 275 | |
| 276 | // The next value to use when generating a numeric based name for Parts |
| 277 | // without explicit kName Tokens. |
| 278 | int next_key_ = 0; |
| 279 | }; |
| 280 | |
| 281 | } // namespace |
| 282 | |
Takashi Nakayama | e011ac4 | 2025-05-02 00:20:01 | [diff] [blame] | 283 | base::expected<Pattern, absl::Status> Parse(std::string_view pattern, |
| 284 | EncodeCallback encode_callback, |
| 285 | const Options& options) { |
Ben Kelly | 9bd0002d | 2020-11-05 03:34:18 | [diff] [blame] | 286 | auto result = Tokenize(pattern); |
Takashi Nakayama | ff8e61c | 2025-05-02 05:17:04 | [diff] [blame] | 287 | if (!result.has_value()) { |
| 288 | return base::unexpected(result.error()); |
| 289 | } |
Ben Kelly | ad8343ba | 2020-10-28 18:48:53 | [diff] [blame] | 290 | |
Ben Kelly | d18c0e82 | 2021-03-05 22:42:17 | [diff] [blame] | 291 | State state(std::move(result.value()), std::move(encode_callback), options); |
Ben Kelly | ad8343ba | 2020-10-28 18:48:53 | [diff] [blame] | 292 | |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 293 | while (state.HasMoreTokens()) { |
| 294 | // Look for the sequence: <prefix char><name><regex><modifier> |
| 295 | // There could be from zero to all through of these tokens. For |
| 296 | // example: |
| 297 | // * "/:foo(bar)?" - all four tokens |
| 298 | // * "/" - just a char token |
| 299 | // * ":foo" - just a name token |
| 300 | // * "(bar)" - just a regex token |
| 301 | // * "/:foo" - char and name tokens |
| 302 | // * "/(bar)" - char and regex tokens |
| 303 | // * "/:foo?" - char, name, and modifier tokens |
| 304 | // * "/(bar)?" - char, regex, and modifier tokens |
| 305 | const Token* char_token = state.TryConsume(TokenType::kChar); |
| 306 | const Token* name_token = state.TryConsume(TokenType::kName); |
Ben Kelly | d228e3c | 2021-02-24 00:43:40 | [diff] [blame] | 307 | const Token* regex_or_wildcard_token = state.TryConsume(TokenType::kRegex); |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 308 | |
Ben Kelly | d228e3c | 2021-02-24 00:43:40 | [diff] [blame] | 309 | // If there is no name or regex token, then we may have a wildcard `*` |
| 310 | // token in place of an unnamed regex token. Each wildcard will be |
| 311 | // treated as being equivalent to a "(.*)" regex token. For example: |
| 312 | // * "/*" - equivalent to "/(.*)" |
| 313 | // * "/*?" - equivalent to "/(.*)?" |
| 314 | if (!name_token && !regex_or_wildcard_token) |
| 315 | regex_or_wildcard_token = state.TryConsume(TokenType::kAsterisk); |
| 316 | |
| 317 | // If there is a name, regex, or wildcard token then we need to add a |
| 318 | // Pattern Part immediately. |
| 319 | if (name_token || regex_or_wildcard_token) { |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 320 | // Determine if the char token is a valid prefix. Only characters in the |
| 321 | // configured prefix_list are automatically treated as prefixes. A |
| 322 | // kEscapedChar Token is never treated as a prefix. |
Helmut Januschka | ed56d61 | 2024-07-12 21:11:09 | [diff] [blame] | 323 | std::string_view prefix = char_token ? char_token->value : ""; |
Ben Kelly | 36f7ba3e | 2020-11-24 19:48:46 | [diff] [blame] | 324 | if (options.prefix_list.find(prefix.data(), /*pos=*/0, prefix.size()) == |
| 325 | std::string::npos) { |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 326 | // This is not a prefix character. Add it to the buffered characters |
| 327 | // to be added as a kFixed Part later. |
| 328 | state.AppendToPendingFixedValue(prefix); |
Helmut Januschka | ed56d61 | 2024-07-12 21:11:09 | [diff] [blame] | 329 | prefix = std::string_view(); |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 330 | } |
| 331 | |
| 332 | // If we have any buffered characters in a pending fixed value, then |
| 333 | // convert them into a kFixed Part now. |
Ben Kelly | d18c0e82 | 2021-03-05 22:42:17 | [diff] [blame] | 334 | absl::Status status = state.MaybeAddPartFromPendingFixedValue(); |
| 335 | if (!status.ok()) |
Takashi Nakayama | e011ac4 | 2025-05-02 00:20:01 | [diff] [blame] | 336 | return base::unexpected(status); |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 337 | |
| 338 | // kName and kRegex tokens can optionally be followed by a modifier. |
Ben Kelly | d228e3c | 2021-02-24 00:43:40 | [diff] [blame] | 339 | const Token* modifier_token = state.TryConsumeModifier(); |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 340 | |
Ben Kelly | d228e3c | 2021-02-24 00:43:40 | [diff] [blame] | 341 | // Add the Part for the name and regex/wildcard tokens. |
Ben Kelly | d18c0e82 | 2021-03-05 22:42:17 | [diff] [blame] | 342 | status = state.AddPart(std::string(prefix), name_token, |
| 343 | regex_or_wildcard_token, |
| 344 | /*suffix=*/"", modifier_token); |
| 345 | if (!status.ok()) |
Takashi Nakayama | e011ac4 | 2025-05-02 00:20:01 | [diff] [blame] | 346 | return base::unexpected(status); |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 347 | continue; |
| 348 | } |
| 349 | |
| 350 | // There was neither a kRegex or kName token, so consider if we just have a |
| 351 | // fixed string part. A fixed string can consist of kChar or kEscapedChar |
| 352 | // tokens. These just get added to the buffered pending fixed value for |
| 353 | // now. It will get converted to a kFixed Part later. |
| 354 | const Token* fixed_token = char_token; |
| 355 | if (!fixed_token) |
| 356 | fixed_token = state.TryConsume(TokenType::kEscapedChar); |
| 357 | if (fixed_token) { |
| 358 | state.AppendToPendingFixedValue(fixed_token->value); |
| 359 | continue; |
| 360 | } |
| 361 | |
| 362 | // There was not a kChar or kEscapedChar token, so we no we are at the end |
Ben Kelly | ccecd87 | 2021-07-27 18:37:17 | [diff] [blame] | 363 | // of any fixed string. Do not yet convert the pending fixed value into |
| 364 | // a kFixedPart, though. Its possible there will be further fixed text in |
| 365 | // a `{ ... }` group, etc. |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 366 | |
| 367 | // Look for the sequence: |
| 368 | // |
| 369 | // <open><char prefix><name><regex><char suffix><close><modifier> |
| 370 | // |
| 371 | // The open and close are required, but the other tokens are optional. |
| 372 | // For example: |
| 373 | // * "{a:foo(.*)b}?" - all tokens present |
| 374 | // * "{:foo}?" - just name and modifier tokens |
| 375 | // * "{(.*)}?" - just regex and modifier tokens |
| 376 | // * "{ab}?" - just char and modifier tokens |
| 377 | const Token* open_token = state.TryConsume(TokenType::kOpen); |
| 378 | if (open_token) { |
| 379 | std::string prefix = state.ConsumeText(); |
| 380 | const Token* name_token = state.TryConsume(TokenType::kName); |
Ben Kelly | d228e3c | 2021-02-24 00:43:40 | [diff] [blame] | 381 | const Token* regex_or_wildcard_token = |
| 382 | state.TryConsume(TokenType::kRegex); |
| 383 | |
| 384 | // If there is no name or regex token, then we may have a wildcard `*` |
| 385 | // token in place of an unnamed regex token. Each wildcard will be |
| 386 | // treated as being equivalent to a "(.*)" regex token. For example, |
| 387 | // "{a*b}" is equivalent to "{a(.*)b}". |
| 388 | if (!name_token && !regex_or_wildcard_token) |
| 389 | regex_or_wildcard_token = state.TryConsume(TokenType::kAsterisk); |
| 390 | |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 391 | std::string suffix = state.ConsumeText(); |
| 392 | |
| 393 | auto result = state.MustConsume(TokenType::kClose); |
| 394 | if (!result.ok()) |
Takashi Nakayama | e011ac4 | 2025-05-02 00:20:01 | [diff] [blame] | 395 | return base::unexpected(result.status()); |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 396 | |
Ben Kelly | d228e3c | 2021-02-24 00:43:40 | [diff] [blame] | 397 | const Token* modifier_token = state.TryConsumeModifier(); |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 398 | |
Ben Kelly | ccecd87 | 2021-07-27 18:37:17 | [diff] [blame] | 399 | absl::Status status = |
Ben Kelly | d18c0e82 | 2021-03-05 22:42:17 | [diff] [blame] | 400 | state.AddPart(std::move(prefix), name_token, regex_or_wildcard_token, |
| 401 | std::move(suffix), modifier_token); |
| 402 | if (!status.ok()) |
Takashi Nakayama | e011ac4 | 2025-05-02 00:20:01 | [diff] [blame] | 403 | return base::unexpected(status); |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 404 | continue; |
| 405 | } |
| 406 | |
Ben Kelly | ccecd87 | 2021-07-27 18:37:17 | [diff] [blame] | 407 | // We are about to end the pattern string, so flush any pending text to |
| 408 | // a kFixed Part. |
| 409 | absl::Status status = state.MaybeAddPartFromPendingFixedValue(); |
| 410 | if (!status.ok()) |
Takashi Nakayama | e011ac4 | 2025-05-02 00:20:01 | [diff] [blame] | 411 | return base::unexpected(status); |
Ben Kelly | ccecd87 | 2021-07-27 18:37:17 | [diff] [blame] | 412 | |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 413 | // We didn't find any tokens allowed by the syntax, so we should be |
| 414 | // at the end of the token list. If there is a syntax error, this |
| 415 | // is where it will typically be caught. |
| 416 | auto result = state.MustConsume(TokenType::kEnd); |
| 417 | if (!result.ok()) |
Takashi Nakayama | e011ac4 | 2025-05-02 00:20:01 | [diff] [blame] | 418 | return base::unexpected(result.status()); |
Ben Kelly | 0e5c63e8 | 2020-11-12 21:24:08 | [diff] [blame] | 419 | } |
| 420 | |
| 421 | return state.TakeAsPattern(); |
Ben Kelly | ad8343ba | 2020-10-28 18:48:53 | [diff] [blame] | 422 | } |
| 423 | |
| 424 | } // namespace liburlpattern |