summaryrefslogtreecommitdiff
path: root/src/common/pg_lzcompress.c
diff options
context:
space:
mode:
authorTomas Vondra2019-10-01 12:13:44 +0000
committerTomas Vondra2019-10-01 12:28:28 +0000
commit11a078cf87ffb611d19c7dec6df68b41084ad9c9 (patch)
treecdb79b0578ea99a1c090d2704d4d5f0aa9266dc9 /src/common/pg_lzcompress.c
parent002962dc7293043126561b0d0df79d6c76251804 (diff)
Optimize partial TOAST decompression
Commit 4d0e994eed added support for partial TOAST decompression, so the decompression is interrupted after producing the requested prefix. For prefix and slices near the beginning of the entry, this may saves a lot of decompression work. That however only deals with decompression - the whole compressed entry was still fetched and re-assembled, even though the compression used only a small fraction of it. This commit improves that by computing how much compressed data may be needed to decompress the requested prefix, and then fetches only the necessary part. We always need to fetch a bit more compressed data than the requested (uncompressed) prefix, because the prefix may not be compressible at all and pglz itself adds a bit of overhead. That means this optimization is most effective when the requested prefix is much smaller than the whole compressed entry. Author: Binguo Bao Reviewed-by: Andrey Borodin, Tomas Vondra, Paul Ramsey Discussion: https://www.postgresql.org/message-id/flat/CAL-OGkthU9Gs7TZchf5OWaL-Gsi=hXqufTxKv9qpNG73d5na_g@mail.gmail.com
Diffstat (limited to 'src/common/pg_lzcompress.c')
-rw-r--r--src/common/pg_lzcompress.c37
1 files changed, 37 insertions, 0 deletions
diff --git a/src/common/pg_lzcompress.c b/src/common/pg_lzcompress.c
index 988b3987d04..3adad62d9a4 100644
--- a/src/common/pg_lzcompress.c
+++ b/src/common/pg_lzcompress.c
@@ -771,3 +771,40 @@ pglz_decompress(const char *source, int32 slen, char *dest,
*/
return (char *) dp - dest;
}
+
+
+/* ----------
+ * pglz_max_compressed_size -
+ *
+ * Calculate the maximum compressed size for a given amount of raw data.
+ * Return the maximum size, or total compressed size if maximum size is
+ * larger than total compressed size.
+ *
+ * We can't use PGLZ_MAX_OUTPUT for this purpose, because that's used to size
+ * the compression buffer (and abort the compression). It does not really say
+ * what's the maximum compressed size for an input of a given length, and it
+ * may happen that while the whole value is compressible (and thus fits into
+ * PGLZ_MAX_OUTPUT nicely), the prefix is not compressible at all.
+ * ----------
+ */
+int32
+pglz_maximum_compressed_size(int32 rawsize, int32 total_compressed_size)
+{
+ int32 compressed_size;
+
+ /*
+ * pglz uses one control bit per byte, so we need (rawsize * 9) bits. We
+ * care about bytes though, so we add 7 to make sure we include the last
+ * incomplete byte (integer division rounds down).
+ *
+ * XXX Use int64 to prevent overflow during calculation.
+ */
+ compressed_size = (int32) ((int64) rawsize * 9 + 7) / 8;
+
+ /*
+ * Maximum compressed size can't be larger than total compressed size.
+ */
+ compressed_size = Min(compressed_size, total_compressed_size);
+
+ return compressed_size;
+}