Skip to content

Commit 3e10c46

Browse files
committedFeb 23, 2024
Duplicated hash keys
1 parent 69d6378 commit 3e10c46

File tree

12 files changed

+545
-26
lines changed

12 files changed

+545
-26
lines changed
 

‎include/prism.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "prism/parser.h"
2222
#include "prism/prettyprint.h"
2323
#include "prism/regexp.h"
24+
#include "prism/static_literals.h"
2425
#include "prism/version.h"
2526

2627
#include <assert.h>

‎include/prism/diagnostic.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ typedef enum {
306306
PM_WARN_AMBIGUOUS_SLASH,
307307
PM_WARN_EQUAL_IN_CONDITIONAL,
308308
PM_WARN_END_IN_METHOD,
309+
PM_WARN_DUPLICATED_HASH_KEY,
309310
PM_WARN_FLOAT_OUT_OF_RANGE,
310311

311312
// This is the number of diagnostic codes.

‎include/prism/node.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,17 @@
1010
#include "prism/parser.h"
1111
#include "prism/util/pm_buffer.h"
1212

13+
/**
14+
* Attempts to grow the node list to the next size. If there is already
15+
* capacity in the list, this function does nothing. Otherwise it reallocates
16+
* the list to be twice as large as it was before. If the reallocation fails,
17+
* this function returns false, otherwise it returns true.
18+
*
19+
* @param list The list to grow.
20+
* @return True if the list was successfully grown, false otherwise.
21+
*/
22+
bool pm_node_list_grow(pm_node_list_t *list);
23+
1324
/**
1425
* Append a new node onto the end of the node list.
1526
*
@@ -18,6 +29,13 @@
1829
*/
1930
void pm_node_list_append(pm_node_list_t *list, pm_node_t *node);
2031

32+
/**
33+
* Free the internal memory associated with the given node list.
34+
*
35+
* @param list The list to free.
36+
*/
37+
void pm_node_list_free(pm_node_list_t *list);
38+
2139
/**
2240
* Deallocate a node and all of its children.
2341
*

‎include/prism/static_literals.h

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
/**
2+
* @file static_literals.h
3+
*
4+
* A set of static literal nodes that can be checked for duplicates.
5+
*/
6+
#ifndef PRISM_STATIC_LITERALS_H
7+
#define PRISM_STATIC_LITERALS_H
8+
9+
#include "prism/defines.h"
10+
#include "prism/ast.h"
11+
#include "prism/node.h"
12+
#include "prism/parser.h"
13+
14+
#include <assert.h>
15+
#include <stdbool.h>
16+
17+
/**
18+
* Certain sets of nodes (hash keys and when clauses) check for duplicate nodes
19+
* to alert the user of potential issues. To do this, we keep a set of the nodes
20+
* that have been seen so far, and compare whenever we find a new node.
21+
*
22+
* We bucket the nodes based on their type to minimize the number of comparisons
23+
* that need to be performed.
24+
*/
25+
typedef struct {
26+
/**
27+
* This is the set of IntegerNode and SourceLineNode instances. We store
28+
* them in a sorted list so that we can binary search through them to find
29+
* duplicates.
30+
*/
31+
pm_node_list_t integer_nodes;
32+
33+
/**
34+
* This is the set of FloatNode instances. We store them in a sorted list so
35+
* that we can binary search through them to find duplicates.
36+
*/
37+
pm_node_list_t float_nodes;
38+
39+
/**
40+
* This is the set of RationalNode instances. We store them in a flat list
41+
* that must be searched linearly.
42+
*/
43+
pm_node_list_t rational_nodes;
44+
45+
/**
46+
* This is the set of ImaginaryNode instances. We store them in a flat list
47+
* that must be searched linearly.
48+
*/
49+
pm_node_list_t imaginary_nodes;
50+
51+
/**
52+
* This is the set of StringNode and SourceFileNode instances. We store them
53+
* in a sorted list so that we can binary search through them to find
54+
* duplicates.
55+
*/
56+
pm_node_list_t string_nodes;
57+
58+
/**
59+
* This is the set of RegularExpressionNode instances. We store them in a
60+
* sorted list so that we can binary search through them to find duplicates.
61+
*/
62+
pm_node_list_t regexp_nodes;
63+
64+
/**
65+
* This is the set of SymbolNode instances. We store them in a sorted list
66+
* so that we can binary search through them to find duplicates.
67+
*/
68+
pm_node_list_t symbol_nodes;
69+
70+
/**
71+
* A pointer to the last TrueNode instance that was inserted, or NULL.
72+
*/
73+
pm_node_t *true_node;
74+
75+
/**
76+
* A pointer to the last FalseNode instance that was inserted, or NULL.
77+
*/
78+
pm_node_t *false_node;
79+
80+
/**
81+
* A pointer to the last NilNode instance that was inserted, or NULL.
82+
*/
83+
pm_node_t *nil_node;
84+
85+
/**
86+
* A pointer to the last SourceEncodingNode instance that was inserted, or
87+
* NULL.
88+
*/
89+
pm_node_t *source_encoding_node;
90+
} pm_static_literals_t;
91+
92+
/**
93+
* Add a node to the set of static literals.
94+
*
95+
* @param parser The parser that created the node.
96+
* @param literals The set of static literals to add the node to.
97+
* @param node The node to add to the set.
98+
* @return A pointer to the node that is being overwritten, if there is one.
99+
*/
100+
pm_node_t * pm_static_literals_add(const pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node);
101+
102+
/**
103+
* Free the internal memory associated with the given static literals set.
104+
*
105+
* @param literals The set of static literals to free.
106+
*/
107+
void pm_static_literals_free(pm_static_literals_t *literals);
108+
109+
#endif

‎include/prism/util/pm_integer.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,17 @@ PRISM_EXPORTED_FUNCTION void pm_integer_parse(pm_integer_t *integer, pm_integer_
9393
*/
9494
size_t pm_integer_memsize(const pm_integer_t *integer);
9595

96+
/**
97+
* Compare two integers. This function returns -1 if the left integer is less
98+
* than the right integer, 0 if they are equal, and 1 if the left integer is
99+
* greater than the right integer.
100+
*
101+
* @param left The left integer to compare.
102+
* @param right The right integer to compare.
103+
* @return The result of the comparison.
104+
*/
105+
int pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right);
106+
96107
/**
97108
* Free the internal memory of an integer. This memory will only be allocated if
98109
* the integer exceeds the size of a single node in the linked list.

‎src/diagnostic.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
306306
[PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS] = { "ambiguous first argument; put parentheses or a space even after `+` operator", PM_WARNING_LEVEL_VERBOSE },
307307
[PM_WARN_AMBIGUOUS_PREFIX_STAR] = { "ambiguous `*` has been interpreted as an argument prefix", PM_WARNING_LEVEL_VERBOSE },
308308
[PM_WARN_AMBIGUOUS_SLASH] = { "ambiguous `/`; wrap regexp in parentheses or add a space after `/` operator", PM_WARNING_LEVEL_VERBOSE },
309+
[PM_WARN_DUPLICATED_HASH_KEY] = { "key %.*s is duplicated and overwritten on line %" PRIi32, PM_WARNING_LEVEL_DEFAULT },
309310
[PM_WARN_EQUAL_IN_CONDITIONAL] = { "found `= literal' in conditional, should be ==", PM_WARNING_LEVEL_DEFAULT },
310311
[PM_WARN_END_IN_METHOD] = { "END in method; use at_exit", PM_WARNING_LEVEL_DEFAULT },
311312
[PM_WARN_FLOAT_OUT_OF_RANGE] = { "Float %.*s%s out of range", PM_WARNING_LEVEL_VERBOSE }

‎src/prism.c

Lines changed: 61 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11676,11 +11676,32 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
1167611676
return statements;
1167711677
}
1167811678

11679+
/**
11680+
* Add a node to a set of static literals that holds a set of hash keys. If the
11681+
* node is a duplicate, then add an appropriate warning.
11682+
*/
11683+
static void
11684+
pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
11685+
const pm_node_t *duplicated = pm_static_literals_add(parser, literals, node);
11686+
11687+
if (duplicated != NULL) {
11688+
pm_diagnostic_list_append_format(
11689+
&parser->warning_list,
11690+
duplicated->location.start,
11691+
duplicated->location.end,
11692+
PM_WARN_DUPLICATED_HASH_KEY,
11693+
(int) (duplicated->location.end - duplicated->location.start),
11694+
duplicated->location.start,
11695+
pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
11696+
);
11697+
}
11698+
}
11699+
1167911700
/**
1168011701
* Parse all of the elements of a hash. returns true if a double splat was found.
1168111702
*/
1168211703
static bool
11683-
parse_assocs(pm_parser_t *parser, pm_node_t *node) {
11704+
parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
1168411705
assert(PM_NODE_TYPE_P(node, PM_HASH_NODE) || PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE));
1168511706
bool contains_keyword_splat = false;
1168611707

@@ -11709,6 +11730,8 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
1170911730
parser_lex(parser);
1171011731

1171111732
pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &label);
11733+
pm_hash_key_static_literals_add(parser, literals, key);
11734+
1171211735
pm_token_t operator = not_provided(parser);
1171311736
pm_node_t *value = NULL;
1171411737

@@ -11738,8 +11761,16 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
1173811761
}
1173911762
default: {
1174011763
pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_KEY);
11741-
pm_token_t operator;
1174211764

11765+
// Hash keys that are strings are automatically frozen. We will
11766+
// mark that here.
11767+
if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
11768+
pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
11769+
}
11770+
11771+
pm_hash_key_static_literals_add(parser, literals, key);
11772+
11773+
pm_token_t operator;
1174311774
if (pm_symbol_node_label_p(key)) {
1174411775
operator = not_provided(parser);
1174511776
} else {
@@ -11773,6 +11804,7 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
1177311804
// Otherwise by default we will exit out of this loop.
1177411805
break;
1177511806
}
11807+
1177611808
return contains_keyword_splat;
1177711809
}
1177811810

@@ -11830,12 +11862,17 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
1183011862
pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
1183111863
argument = (pm_node_t *) hash;
1183211864

11833-
bool contains_keyword_splat = parse_assocs(parser, (pm_node_t *) hash);
11834-
parsed_bare_hash = true;
11865+
pm_static_literals_t literals = { 0 };
11866+
bool contains_keyword_splat = parse_assocs(parser, &literals, (pm_node_t *) hash);
11867+
1183511868
parse_arguments_append(parser, arguments, argument);
1183611869
if (contains_keyword_splat) {
1183711870
pm_node_flag_set((pm_node_t *)arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT);
1183811871
}
11872+
11873+
pm_static_literals_free(&literals);
11874+
parsed_bare_hash = true;
11875+
1183911876
break;
1184011877
}
1184111878
case PM_TOKEN_UAMPERSAND: {
@@ -11925,10 +11962,14 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
1192511962

1192611963
pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
1192711964

11965+
// Create the set of static literals for this hash.
11966+
pm_static_literals_t literals = { 0 };
11967+
pm_hash_key_static_literals_add(parser, &literals, argument);
11968+
1192811969
// Finish parsing the one we are part way through
1192911970
pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_VALUE);
11930-
1193111971
argument = (pm_node_t *) pm_assoc_node_create(parser, argument, &operator, value);
11972+
1193211973
pm_keyword_hash_node_elements_append(bare_hash, argument);
1193311974
argument = (pm_node_t *) bare_hash;
1193411975

@@ -11937,9 +11978,10 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
1193711978
token_begins_expression_p(parser->current.type) ||
1193811979
match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
1193911980
)) {
11940-
contains_keyword_splat = parse_assocs(parser, (pm_node_t *) bare_hash);
11981+
contains_keyword_splat = parse_assocs(parser, &literals, (pm_node_t *) bare_hash);
1194111982
}
1194211983

11984+
pm_static_literals_free(&literals);
1194311985
parsed_bare_hash = true;
1194411986
} else if (accept1(parser, PM_TOKEN_KEYWORD_IN)) {
1194511987
// TODO: Could we solve this with binding powers instead?
@@ -14661,13 +14703,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
1466114703
pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
1466214704
}
1466314705

14664-
pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
14665-
element = (pm_node_t *)hash;
14706+
element = (pm_node_t *) pm_keyword_hash_node_create(parser);
14707+
pm_static_literals_t literals = { 0 };
1466614708

1466714709
if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
14668-
parse_assocs(parser, (pm_node_t *) hash);
14710+
parse_assocs(parser, &literals, element);
1466914711
}
1467014712

14713+
pm_static_literals_free(&literals);
1467114714
parsed_bare_hash = true;
1467214715
} else {
1467314716
element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_ARRAY_EXPRESSION);
@@ -14678,6 +14721,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
1467814721
}
1467914722

1468014723
pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
14724+
pm_static_literals_t literals = { 0 };
14725+
pm_hash_key_static_literals_add(parser, &literals, element);
1468114726

1468214727
pm_token_t operator;
1468314728
if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
@@ -14690,11 +14735,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
1469014735
pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, element, &operator, value);
1469114736
pm_keyword_hash_node_elements_append(hash, assoc);
1469214737

14693-
element = (pm_node_t *)hash;
14738+
element = (pm_node_t *) hash;
1469414739
if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
14695-
parse_assocs(parser, (pm_node_t *) hash);
14740+
parse_assocs(parser, &literals, element);
1469614741
}
1469714742

14743+
pm_static_literals_free(&literals);
1469814744
parsed_bare_hash = true;
1469914745
}
1470014746
}
@@ -14840,17 +14886,20 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
1484014886
case PM_TOKEN_BRACE_LEFT: {
1484114887
pm_accepts_block_stack_push(parser, true);
1484214888
parser_lex(parser);
14889+
1484314890
pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
14891+
pm_static_literals_t literals = { 0 };
1484414892

1484514893
if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
14846-
parse_assocs(parser, (pm_node_t *) node);
14894+
parse_assocs(parser, &literals, (pm_node_t *) node);
1484714895
accept1(parser, PM_TOKEN_NEWLINE);
1484814896
}
1484914897

1485014898
pm_accepts_block_stack_pop(parser);
1485114899
expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
1485214900
pm_hash_node_closing_loc_set(node, &parser->previous);
1485314901

14902+
pm_static_literals_free(&literals);
1485414903
return (pm_node_t *) node;
1485514904
}
1485614905
case PM_TOKEN_CHARACTER_LITERAL: {

0 commit comments

Comments
 (0)