1
1
# frozen_string_literal: true
2
+
2
3
require_relative 'test_helper'
3
4
4
5
class JSONEncodingTest < Test ::Unit ::TestCase
@@ -37,7 +38,7 @@ def test_unicode
37
38
assert_equal '"\u001f"' , 0x1f . chr . to_json
38
39
assert_equal '" "' , ' ' . to_json
39
40
assert_equal "\" #{ 0x7f . chr } \" " , 0x7f . chr . to_json
40
- utf8 = [ "© ≠ €! \01 " ]
41
+ utf8 = [ "© ≠ €! \01 " ]
41
42
json = '["© ≠ €! \u0001"]'
42
43
assert_equal json , utf8 . to_json ( ascii_only : false )
43
44
assert_equal utf8 , parse ( json )
@@ -78,10 +79,10 @@ def test_chars
78
79
json = '"\u%04x"' % i
79
80
i = i . chr
80
81
assert_equal i , parse ( json ) [ 0 ]
81
- if i == ?\b
82
+ if i == " \b "
82
83
generated = generate ( i )
83
- assert '"\b"' == generated || '"\10"' == generated
84
- elsif [ ?\n , ?\r , ?\t , ?\f ] . include? ( i )
84
+ assert [ '"\b"' , '"\10"' ] . include? ( generated )
85
+ elsif [ " \n " , " \r " , " \t " , " \f " ] . include? ( i )
85
86
assert_equal i . dump , generate ( i )
86
87
elsif i . chr < 0x20 . chr
87
88
assert_equal json , generate ( i )
@@ -92,4 +93,179 @@ def test_chars
92
93
end
93
94
assert_equal "\302 \200 " , parse ( '"\u0080"' )
94
95
end
96
+
97
+ def test_deeply_nested_structures
98
+ # Test for deeply nested arrays
99
+ nesting_level = 100
100
+ deeply_nested = [ ]
101
+ current = deeply_nested
102
+
103
+ ( nesting_level - 1 ) . times do
104
+ current << [ ]
105
+ current = current [ 0 ]
106
+ end
107
+
108
+ json = generate ( deeply_nested )
109
+ assert_equal deeply_nested , parse ( json )
110
+
111
+ # Test for deeply nested objects/hashes
112
+ deeply_nested_hash = { }
113
+ current_hash = deeply_nested_hash
114
+
115
+ ( nesting_level - 1 ) . times do |i |
116
+ current_hash [ "key#{ i } " ] = { }
117
+ current_hash = current_hash [ "key#{ i } " ]
118
+ end
119
+
120
+ json = generate ( deeply_nested_hash )
121
+ assert_equal deeply_nested_hash , parse ( json )
122
+ end
123
+
124
+ def test_very_large_json_strings
125
+ # Create a large array with repeated elements
126
+ large_array = Array . new ( 10_000 ) { |i | "item#{ i } " }
127
+
128
+ json = generate ( large_array )
129
+ parsed = parse ( json )
130
+
131
+ assert_equal large_array . size , parsed . size
132
+ assert_equal large_array . first , parsed . first
133
+ assert_equal large_array . last , parsed . last
134
+
135
+ # Create a large hash
136
+ large_hash = { }
137
+ 10_000 . times { |i | large_hash [ "key#{ i } " ] = "value#{ i } " }
138
+
139
+ json = generate ( large_hash )
140
+ parsed = parse ( json )
141
+
142
+ assert_equal large_hash . size , parsed . size
143
+ assert_equal large_hash [ "key0" ] , parsed [ "key0" ]
144
+ assert_equal large_hash [ "key9999" ] , parsed [ "key9999" ]
145
+ end
146
+
147
+ def test_invalid_utf8_sequences
148
+ # Create strings with invalid UTF-8 sequences
149
+ invalid_utf8 = "\xFF \xFF "
150
+
151
+ # Test that generating JSON with invalid UTF-8 raises an error
152
+ # Different JSON implementations may handle this differently,
153
+ # so we'll check if any exception is raised
154
+ begin
155
+ generate ( invalid_utf8 )
156
+ raise "Expected an exception when generating JSON with invalid UTF8"
157
+ rescue StandardError => e
158
+ assert true
159
+ assert_match ( %r{source sequence is illegal/malformed utf-8} , e . message )
160
+ end
161
+ end
162
+
163
+ def test_surrogate_pair_handling
164
+ # Test valid surrogate pairs
165
+ assert_equal "\u{10000} " , parse ( '"\ud800\udc00"' )
166
+ assert_equal "\u{10FFFF} " , parse ( '"\udbff\udfff"' )
167
+
168
+ # The existing test already checks for orphaned high surrogate
169
+ assert_raise ( JSON ::ParserError ) { parse ( '"\ud800"' ) }
170
+
171
+ # Test generating surrogate pairs
172
+ utf8_string = "\u{10437} "
173
+ generated = generate ( utf8_string , ascii_only : true )
174
+ assert_match ( /\\ ud801\\ udc37/ , generated )
175
+ end
176
+
177
+ def test_json_escaping_edge_cases
178
+ # Test escaping forward slashes
179
+ assert_equal "/" , parse ( '"\/"' )
180
+
181
+ # Test escaping backslashes
182
+ assert_equal "\\ " , parse ( '"\\\\"' )
183
+
184
+ # Test escaping quotes
185
+ assert_equal '"' , parse ( '"\\""' )
186
+
187
+ # Multiple escapes in sequence - different JSON parsers might handle escaped forward slashes differently
188
+ # Some parsers preserve the escaping, others don't
189
+ escaped_result = parse ( '"\\\\\\"\\/"' )
190
+ assert_match ( /\\ "/ , escaped_result )
191
+ assert_match ( %r{/} , escaped_result )
192
+
193
+ # Generate string with all special characters
194
+ special_chars = "\b \f \n \r \t \" \\ "
195
+ escaped_json = generate ( special_chars )
196
+ assert_equal special_chars , parse ( escaped_json )
197
+ end
198
+
199
+ def test_empty_objects_and_arrays
200
+ # Test empty objects with different encodings
201
+ assert_equal ( { } , parse ( '{}' ) )
202
+ assert_equal ( { } , parse ( '{}' . encode ( Encoding ::UTF_16BE ) ) )
203
+ assert_equal ( { } , parse ( '{}' . encode ( Encoding ::UTF_16LE ) ) )
204
+ assert_equal ( { } , parse ( '{}' . encode ( Encoding ::UTF_32BE ) ) )
205
+ assert_equal ( { } , parse ( '{}' . encode ( Encoding ::UTF_32LE ) ) )
206
+
207
+ # Test empty arrays with different encodings
208
+ assert_equal ( [ ] , parse ( '[]' ) )
209
+ assert_equal ( [ ] , parse ( '[]' . encode ( Encoding ::UTF_16BE ) ) )
210
+ assert_equal ( [ ] , parse ( '[]' . encode ( Encoding ::UTF_16LE ) ) )
211
+ assert_equal ( [ ] , parse ( '[]' . encode ( Encoding ::UTF_32BE ) ) )
212
+ assert_equal ( [ ] , parse ( '[]' . encode ( Encoding ::UTF_32LE ) ) )
213
+
214
+ # Test generating empty objects and arrays
215
+ assert_equal '{}' , generate ( { } )
216
+ assert_equal '[]' , generate ( [ ] )
217
+ end
218
+
219
+ def test_null_character_handling
220
+ # Test parsing null character
221
+ assert_equal "\u0000 " , parse ( '"\u0000"' )
222
+
223
+ # Test generating null character
224
+ string_with_null = "\u0000 "
225
+ generated = generate ( string_with_null )
226
+ assert_equal '"\u0000"' , generated
227
+
228
+ # Test null characters in middle of string
229
+ mixed_string = "before\u0000 after"
230
+ generated = generate ( mixed_string )
231
+ assert_equal mixed_string , parse ( generated )
232
+ end
233
+
234
+ def test_whitespace_handling
235
+ # Test parsing with various whitespace patterns
236
+ assert_equal ( { } , parse ( ' { } ' ) )
237
+ assert_equal ( { } , parse ( "{\r \n }" ) )
238
+ assert_equal ( [ ] , parse ( " [ \n ] " ) )
239
+ assert_equal ( [ "a" , "b" ] , parse ( " [ \n \" a\" ,\r \n \" b\" \n ] " ) )
240
+ assert_equal ( { "a" => "b" } , parse ( " { \n \" a\" \r \n : \t \" b\" \n } " ) )
241
+
242
+ # Test with excessive whitespace
243
+ excessive_whitespace = " \n \r \t " * 10 + "{}" + " \n \r \t " * 10
244
+ assert_equal ( { } , parse ( excessive_whitespace ) )
245
+
246
+ # Mixed whitespace in keys and values
247
+ mixed_json = '{"a \n b":"c \r\n d"}'
248
+ assert_equal ( { "a \n b" => "c \r \n d" } , parse ( mixed_json ) )
249
+ end
250
+
251
+ def test_control_character_handling
252
+ # Test all control characters (U+0000 to U+001F)
253
+ ( 0 ..0x1F ) . each do |i |
254
+ # Skip already tested ones
255
+ next if [ 0x08 , 0x0A , 0x0D , 0x0C , 0x09 ] . include? ( i )
256
+
257
+ control_char = i . chr ( 'UTF-8' )
258
+ escaped_json = '"' + "\\ u%04x" % i + '"'
259
+ assert_equal control_char , parse ( escaped_json )
260
+
261
+ # Check that the character is properly escaped when generating
262
+ assert_match ( /\\ u00[0-1][0-9a-f]/ , generate ( control_char ) )
263
+ end
264
+
265
+ # Test string with multiple control characters
266
+ control_str = "\u0001 \u0002 \u0003 \u0004 "
267
+ generated = generate ( control_str )
268
+ assert_equal control_str , parse ( generated )
269
+ assert_match ( /\\ u0001\\ u0002\\ u0003\\ u0004/ , generated )
270
+ end
95
271
end
0 commit comments