Skip to content

Commit 0ac54a8

Browse files
authored
Update json_encoding_test.rb
1 parent ddfe6f4 commit 0ac54a8

File tree

1 file changed

+180
-4
lines changed

1 file changed

+180
-4
lines changed

test/json/json_encoding_test.rb

Lines changed: 180 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# frozen_string_literal: true
2+
23
require_relative 'test_helper'
34

45
class JSONEncodingTest < Test::Unit::TestCase
@@ -37,7 +38,7 @@ def test_unicode
3738
assert_equal '"\u001f"', 0x1f.chr.to_json
3839
assert_equal '" "', ' '.to_json
3940
assert_equal "\"#{0x7f.chr}\"", 0x7f.chr.to_json
40-
utf8 = [ "© ≠ €! \01" ]
41+
utf8 = ["© ≠ €! \01"]
4142
json = '["© ≠ €! \u0001"]'
4243
assert_equal json, utf8.to_json(ascii_only: false)
4344
assert_equal utf8, parse(json)
@@ -78,10 +79,10 @@ def test_chars
7879
json = '"\u%04x"' % i
7980
i = i.chr
8081
assert_equal i, parse(json)[0]
81-
if i == ?\b
82+
if i == "\b"
8283
generated = generate(i)
83-
assert '"\b"' == generated || '"\10"' == generated
84-
elsif [?\n, ?\r, ?\t, ?\f].include?(i)
84+
assert ['"\b"', '"\10"'].include?(generated)
85+
elsif ["\n", "\r", "\t", "\f"].include?(i)
8586
assert_equal i.dump, generate(i)
8687
elsif i.chr < 0x20.chr
8788
assert_equal json, generate(i)
@@ -92,4 +93,179 @@ def test_chars
9293
end
9394
assert_equal "\302\200", parse('"\u0080"')
9495
end
96+
97+
def test_deeply_nested_structures
98+
# Test for deeply nested arrays
99+
nesting_level = 100
100+
deeply_nested = []
101+
current = deeply_nested
102+
103+
(nesting_level - 1).times do
104+
current << []
105+
current = current[0]
106+
end
107+
108+
json = generate(deeply_nested)
109+
assert_equal deeply_nested, parse(json)
110+
111+
# Test for deeply nested objects/hashes
112+
deeply_nested_hash = {}
113+
current_hash = deeply_nested_hash
114+
115+
(nesting_level - 1).times do |i|
116+
current_hash["key#{i}"] = {}
117+
current_hash = current_hash["key#{i}"]
118+
end
119+
120+
json = generate(deeply_nested_hash)
121+
assert_equal deeply_nested_hash, parse(json)
122+
end
123+
124+
def test_very_large_json_strings
125+
# Create a large array with repeated elements
126+
large_array = Array.new(10_000) { |i| "item#{i}" }
127+
128+
json = generate(large_array)
129+
parsed = parse(json)
130+
131+
assert_equal large_array.size, parsed.size
132+
assert_equal large_array.first, parsed.first
133+
assert_equal large_array.last, parsed.last
134+
135+
# Create a large hash
136+
large_hash = {}
137+
10_000.times { |i| large_hash["key#{i}"] = "value#{i}" }
138+
139+
json = generate(large_hash)
140+
parsed = parse(json)
141+
142+
assert_equal large_hash.size, parsed.size
143+
assert_equal large_hash["key0"], parsed["key0"]
144+
assert_equal large_hash["key9999"], parsed["key9999"]
145+
end
146+
147+
def test_invalid_utf8_sequences
148+
# Create strings with invalid UTF-8 sequences
149+
invalid_utf8 = "\xFF\xFF"
150+
151+
# Test that generating JSON with invalid UTF-8 raises an error
152+
# Different JSON implementations may handle this differently,
153+
# so we'll check if any exception is raised
154+
begin
155+
generate(invalid_utf8)
156+
raise "Expected an exception when generating JSON with invalid UTF8"
157+
rescue StandardError => e
158+
assert true
159+
assert_match(%r{source sequence is illegal/malformed utf-8}, e.message)
160+
end
161+
end
162+
163+
def test_surrogate_pair_handling
164+
# Test valid surrogate pairs
165+
assert_equal "\u{10000}", parse('"\ud800\udc00"')
166+
assert_equal "\u{10FFFF}", parse('"\udbff\udfff"')
167+
168+
# The existing test already checks for orphaned high surrogate
169+
assert_raise(JSON::ParserError) { parse('"\ud800"') }
170+
171+
# Test generating surrogate pairs
172+
utf8_string = "\u{10437}"
173+
generated = generate(utf8_string, ascii_only: true)
174+
assert_match(/\\ud801\\udc37/, generated)
175+
end
176+
177+
def test_json_escaping_edge_cases
178+
# Test escaping forward slashes
179+
assert_equal "/", parse('"\/"')
180+
181+
# Test escaping backslashes
182+
assert_equal "\\", parse('"\\\\"')
183+
184+
# Test escaping quotes
185+
assert_equal '"', parse('"\\""')
186+
187+
# Multiple escapes in sequence - different JSON parsers might handle escaped forward slashes differently
188+
# Some parsers preserve the escaping, others don't
189+
escaped_result = parse('"\\\\\\"\\/"')
190+
assert_match(/\\"/, escaped_result)
191+
assert_match(%r{/}, escaped_result)
192+
193+
# Generate string with all special characters
194+
special_chars = "\b\f\n\r\t\"\\"
195+
escaped_json = generate(special_chars)
196+
assert_equal special_chars, parse(escaped_json)
197+
end
198+
199+
def test_empty_objects_and_arrays
200+
# Test empty objects with different encodings
201+
assert_equal({}, parse('{}'))
202+
assert_equal({}, parse('{}'.encode(Encoding::UTF_16BE)))
203+
assert_equal({}, parse('{}'.encode(Encoding::UTF_16LE)))
204+
assert_equal({}, parse('{}'.encode(Encoding::UTF_32BE)))
205+
assert_equal({}, parse('{}'.encode(Encoding::UTF_32LE)))
206+
207+
# Test empty arrays with different encodings
208+
assert_equal([], parse('[]'))
209+
assert_equal([], parse('[]'.encode(Encoding::UTF_16BE)))
210+
assert_equal([], parse('[]'.encode(Encoding::UTF_16LE)))
211+
assert_equal([], parse('[]'.encode(Encoding::UTF_32BE)))
212+
assert_equal([], parse('[]'.encode(Encoding::UTF_32LE)))
213+
214+
# Test generating empty objects and arrays
215+
assert_equal '{}', generate({})
216+
assert_equal '[]', generate([])
217+
end
218+
219+
def test_null_character_handling
220+
# Test parsing null character
221+
assert_equal "\u0000", parse('"\u0000"')
222+
223+
# Test generating null character
224+
string_with_null = "\u0000"
225+
generated = generate(string_with_null)
226+
assert_equal '"\u0000"', generated
227+
228+
# Test null characters in middle of string
229+
mixed_string = "before\u0000after"
230+
generated = generate(mixed_string)
231+
assert_equal mixed_string, parse(generated)
232+
end
233+
234+
def test_whitespace_handling
235+
# Test parsing with various whitespace patterns
236+
assert_equal({}, parse(' { } '))
237+
assert_equal({}, parse("{\r\n}"))
238+
assert_equal([], parse(" [ \n ] "))
239+
assert_equal(["a", "b"], parse(" [ \n\"a\",\r\n \"b\"\n ] "))
240+
assert_equal({ "a" => "b" }, parse(" { \n\"a\" \r\n: \t\"b\"\n } "))
241+
242+
# Test with excessive whitespace
243+
excessive_whitespace = " \n\r\t" * 10 + "{}" + " \n\r\t" * 10
244+
assert_equal({}, parse(excessive_whitespace))
245+
246+
# Mixed whitespace in keys and values
247+
mixed_json = '{"a \n b":"c \r\n d"}'
248+
assert_equal({ "a \n b" => "c \r\n d" }, parse(mixed_json))
249+
end
250+
251+
def test_control_character_handling
252+
# Test all control characters (U+0000 to U+001F)
253+
(0..0x1F).each do |i|
254+
# Skip already tested ones
255+
next if [0x08, 0x0A, 0x0D, 0x0C, 0x09].include?(i)
256+
257+
control_char = i.chr('UTF-8')
258+
escaped_json = '"' + "\\u%04x" % i + '"'
259+
assert_equal control_char, parse(escaped_json)
260+
261+
# Check that the character is properly escaped when generating
262+
assert_match(/\\u00[0-1][0-9a-f]/, generate(control_char))
263+
end
264+
265+
# Test string with multiple control characters
266+
control_str = "\u0001\u0002\u0003\u0004"
267+
generated = generate(control_str)
268+
assert_equal control_str, parse(generated)
269+
assert_match(/\\u0001\\u0002\\u0003\\u0004/, generated)
270+
end
95271
end

0 commit comments

Comments
 (0)