summaryrefslogtreecommitdiff
path: root/src/include/mb/pg_wchar.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/mb/pg_wchar.h')
-rw-r--r--src/include/mb/pg_wchar.h22
1 files changed, 20 insertions, 2 deletions
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
index a9aaff9e6dc..0f31e683189 100644
--- a/src/include/mb/pg_wchar.h
+++ b/src/include/mb/pg_wchar.h
@@ -306,9 +306,9 @@ typedef enum pg_enc
/*
* When converting strings between different encodings, we assume that space
- * for converted result is 4-to-1 growth in the worst case. The rate for
+ * for converted result is 4-to-1 growth in the worst case. The rate for
* currently supported encoding pairs are within 3 (SJIS JIS X0201 half width
- * kanna -> UTF8 is the worst case). So "4" should be enough for the moment.
+ * kana -> UTF8 is the worst case). So "4" should be enough for the moment.
*
* Note that this is not the same as the maximum character width in any
* particular encoding.
@@ -316,6 +316,24 @@ typedef enum pg_enc
#define MAX_CONVERSION_GROWTH 4
/*
+ * Maximum byte length of a string that's required in any encoding to convert
+ * at least one character to any other encoding. In other words, if you feed
+ * MAX_CONVERSION_INPUT_LENGTH bytes to any encoding conversion function, it
+ * is guaranteed to be able to convert something without needing more input
+ * (assuming the input is valid).
+ *
+ * Currently, the maximum case is the conversion UTF8 -> SJIS JIS X0201 half
+ * width kana, where a pair of UTF-8 characters is converted into a single
+ * SHIFT_JIS_2004 character (the reverse of the worst case for
+ * MAX_CONVERSION_GROWTH). It needs 6 bytes of input. In theory, a
+ * user-defined conversion function might have more complicated cases, although
+ * for the reverse mapping you would probably also need to bump up
+ * MAX_CONVERSION_GROWTH. But there is no need to be stingy here, so make it
+ * generous.
+ */
+#define MAX_CONVERSION_INPUT_LENGTH 16
+
+/*
* Maximum byte length of the string equivalent to any one Unicode code point,
* in any backend encoding. The current value assumes that a 4-byte UTF-8
* character might expand by MAX_CONVERSION_GROWTH, which is a huge