Skip to content

Commit 0c04256

Browse files
committed
Add some encoding debugging to make testing easier
1 parent 54566d2 commit 0c04256

File tree

2 files changed

+128
-0
lines changed

2 files changed

+128
-0
lines changed

ext/prism/extension.c

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ VALUE rb_cPrismParseError;
2121
VALUE rb_cPrismParseWarning;
2222
VALUE rb_cPrismParseResult;
2323

24+
VALUE rb_cPrismDebugEncoding;
25+
2426
ID rb_option_id_filepath;
2527
ID rb_option_id_encoding;
2628
ID rb_option_id_line;
@@ -1102,6 +1104,80 @@ format_errors(VALUE self, VALUE source, VALUE colorize) {
11021104
return result;
11031105
}
11041106

1107+
/**
1108+
* call-seq: Debug::Encoding.all -> Array[Debug::Encoding]
1109+
*
1110+
* Return an array of all of the encodings that prism knows about.
1111+
*/
1112+
static VALUE
1113+
encoding_all(VALUE self) {
1114+
VALUE encodings = rb_ary_new();
1115+
1116+
for (size_t index = 0; index < PM_ENCODING_MAXIMUM; index++) {
1117+
const pm_encoding_t *encoding = &pm_encodings[index];
1118+
1119+
VALUE encoding_argv[] = { rb_str_new_cstr(encoding->name), encoding->multibyte ? Qtrue : Qfalse };
1120+
rb_ary_push(encodings, rb_class_new_instance(2, encoding_argv, rb_cPrismDebugEncoding));
1121+
}
1122+
1123+
return encodings;
1124+
}
1125+
1126+
static const pm_encoding_t *
1127+
encoding_find(VALUE name) {
1128+
const uint8_t *source = (const uint8_t *) RSTRING_PTR(name);
1129+
size_t length = RSTRING_LEN(name);
1130+
1131+
const pm_encoding_t *encoding = pm_encoding_find(source, source + length);
1132+
if (encoding == NULL) { rb_raise(rb_eArgError, "Unknown encoding: %s", source); }
1133+
1134+
return encoding;
1135+
}
1136+
1137+
/**
1138+
* call-seq: Debug::Encoding.width(source) -> Integer
1139+
*
1140+
* Returns the width of the first character in the given string if it is valid
1141+
* in the encoding. If it is not, this function returns 0.
1142+
*/
1143+
static VALUE
1144+
encoding_char_width(VALUE self, VALUE name, VALUE value) {
1145+
return ULONG2NUM(encoding_find(name)->char_width((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)));
1146+
}
1147+
1148+
/**
1149+
* call-seq: Debug::Encoding.alnum?(source) -> true | false
1150+
*
1151+
* Returns true if the first character in the given string is an alphanumeric
1152+
* character in the encoding.
1153+
*/
1154+
static VALUE
1155+
encoding_alnum_char(VALUE self, VALUE name, VALUE value) {
1156+
return encoding_find(name)->alnum_char((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)) > 0 ? Qtrue : Qfalse;
1157+
}
1158+
1159+
/**
1160+
* call-seq: Debug::Encoding.alpha?(source) -> true | false
1161+
*
1162+
* Returns true if the first character in the given string is an alphabetic
1163+
* character in the encoding.
1164+
*/
1165+
static VALUE
1166+
encoding_alpha_char(VALUE self, VALUE name, VALUE value) {
1167+
return encoding_find(name)->alpha_char((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)) > 0 ? Qtrue : Qfalse;
1168+
}
1169+
1170+
/**
1171+
* call-seq: Debug::Encoding.upper?(source) -> true | false
1172+
*
1173+
* Returns true if the first character in the given string is an uppercase
1174+
* character in the encoding.
1175+
*/
1176+
static VALUE
1177+
encoding_isupper_char(VALUE self, VALUE name, VALUE value) {
1178+
return encoding_find(name)->isupper_char((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)) ? Qtrue : Qfalse;
1179+
}
1180+
11051181
/******************************************************************************/
11061182
/* Initialization of the extension */
11071183
/******************************************************************************/
@@ -1182,6 +1258,15 @@ Init_prism(void) {
11821258
rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1);
11831259
rb_define_singleton_method(rb_cPrismDebug, "format_errors", format_errors, 2);
11841260

1261+
// Next, define the functions that are exposed through the private
1262+
// Debug::Encoding class.
1263+
rb_cPrismDebugEncoding = rb_define_class_under(rb_cPrismDebug, "Encoding", rb_cObject);
1264+
rb_define_singleton_method(rb_cPrismDebugEncoding, "all", encoding_all, 0);
1265+
rb_define_singleton_method(rb_cPrismDebugEncoding, "_width", encoding_char_width, 2);
1266+
rb_define_singleton_method(rb_cPrismDebugEncoding, "_alnum?", encoding_alnum_char, 2);
1267+
rb_define_singleton_method(rb_cPrismDebugEncoding, "_alpha?", encoding_alpha_char, 2);
1268+
rb_define_singleton_method(rb_cPrismDebugEncoding, "_upper?", encoding_isupper_char, 2);
1269+
11851270
// Next, initialize the other APIs.
11861271
Init_prism_api_node();
11871272
Init_prism_pack();

lib/prism/debug.rb

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,5 +202,48 @@ def self.prism_locals(source)
202202
def self.newlines(source)
203203
Prism.parse(source).source.offsets
204204
end
205+
206+
# A wrapping around prism's internal encoding data structures. This is used
207+
# for reflection and debugging purposes.
208+
class Encoding
209+
# The name of the encoding, that can be passed to Encoding.find.
210+
attr_reader :name
211+
212+
# Initialize a new encoding with the given name and whether or not it is
213+
# a multibyte encoding.
214+
def initialize(name, multibyte)
215+
@name = name
216+
@multibyte = multibyte
217+
end
218+
219+
# Whether or not the encoding is a multibyte encoding.
220+
def multibyte?
221+
@multibyte
222+
end
223+
224+
# Returns the number of bytes of the first character in the source string,
225+
# if it is valid for the encoding. Otherwise, returns 0.
226+
def width(source)
227+
Encoding._width(name, source)
228+
end
229+
230+
# Returns true if the first character in the source string is a valid
231+
# alphanumeric character for the encoding.
232+
def alnum?(source)
233+
Encoding._alnum?(name, source)
234+
end
235+
236+
# Returns true if the first character in the source string is a valid
237+
# alphabetic character for the encoding.
238+
def alpha?(source)
239+
Encoding._alpha?(name, source)
240+
end
241+
242+
# Returns true if the first character in the source string is a valid
243+
# uppercase character for the encoding.
244+
def upper?(source)
245+
Encoding._upper?(name, source)
246+
end
247+
end
205248
end
206249
end

0 commit comments

Comments
 (0)