summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFujii Masao2016-01-28 03:57:52 +0000
committerFujii Masao2016-01-28 03:57:52 +0000
commit7f46eaf035440564e7dae3b03916735b92c83248 (patch)
treec8eeeea61e1e5046a1ed7edd737610dab862c11a
parenteaf7b1f6432480e93d8c6824fbd503761a1c1a4f (diff)
Add gin_clean_pending_list function to clean up GIN pending list
This function cleans up the pending list of the GIN index by moving entries in it to the main GIN data structure in bulk. It returns the number of pages cleaned up from the pending list. This function is useful, for example, when the pending list needs to be cleaned up *quickly* to improve the performance of the search using GIN index. VACUUM can do the same thing, too, but it may take days to run on a large table. Jeff Janes, reviewed by Julien Rouhaud, Jaime Casanova, Alvaro Herrera and me. Discussion: CAMkU=1x8zFkpfnozXyt40zmR3Ub_kHu58LtRmwHUKRgQss7=iQ@mail.gmail.com
-rw-r--r--doc/src/sgml/func.sgml26
-rw-r--r--doc/src/sgml/gin.sgml4
-rw-r--r--doc/src/sgml/ref/create_index.sgml4
-rw-r--r--src/backend/access/gin/ginfast.c52
-rw-r--r--src/include/access/gin_private.h3
-rw-r--r--src/include/catalog/catversion.h2
-rw-r--r--src/include/catalog/pg_proc.h2
-rw-r--r--src/test/regress/expected/gin.out13
-rw-r--r--src/test/regress/sql/gin.sql6
9 files changed, 108 insertions, 4 deletions
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 9c143b2a634..139aa2b811e 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -18036,9 +18036,16 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
<primary>brin_summarize_new_values</primary>
</indexterm>
+ <indexterm>
+ <primary>gin_clean_pending_list</primary>
+ </indexterm>
+
<para>
<xref linkend="functions-admin-index-table"> shows the functions
available for index maintenance tasks.
+ These functions cannot be executed during recovery.
+ Use of these functions is restricted to superusers and the owner
+ of the given index.
</para>
<table id="functions-admin-index-table">
@@ -18056,6 +18063,13 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
<entry><type>integer</type></entry>
<entry>summarize page ranges not already summarized</entry>
</row>
+ <row>
+ <entry>
+ <literal><function>gin_clean_pending_list(<parameter>index</> <type>regclass</>)</function></literal>
+ </entry>
+ <entry><type>bigint</type></entry>
+ <entry>move GIN pending list entries into main index structure</entry>
+ </row>
</tbody>
</tgroup>
</table>
@@ -18069,6 +18083,18 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
into the index.
</para>
+ <para>
+ <function>gin_clean_pending_list</> accepts the OID or name of
+ a GIN index and cleans up the pending list of the specified GIN index
+ by moving entries in it to the main GIN data structure in bulk.
+ It returns the number of pages cleaned up from the pending list.
+ Note that if the argument is a GIN index built with <literal>fastupdate</>
+ option disabled, the cleanup does not happen and the return value is 0
+ because the index doesn't have a pending list.
+ Please see <xref linkend="gin-fast-update"> and <xref linkend="gin-tips">
+ for details of the pending list and <literal>fastupdate</> option.
+ </para>
+
</sect2>
<sect2 id="functions-admin-genfile">
diff --git a/doc/src/sgml/gin.sgml b/doc/src/sgml/gin.sgml
index 9eb0b5a957f..a392f949ffd 100644
--- a/doc/src/sgml/gin.sgml
+++ b/doc/src/sgml/gin.sgml
@@ -734,7 +734,9 @@
from the indexed item). As of <productname>PostgreSQL</productname> 8.4,
<acronym>GIN</> is capable of postponing much of this work by inserting
new tuples into a temporary, unsorted list of pending entries.
- When the table is vacuumed, or if the pending list becomes larger than
+ When the table is vacuumed or autoanalyzed, or when
+ <function>gin_clean_pending_list</function> function is called, or if the
+ pending list becomes larger than
<xref linkend="guc-gin-pending-list-limit">, the entries are moved to the
main <acronym>GIN</acronym> data structure using the same bulk insert
techniques used during initial index creation. This greatly improves
diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index ce36a1ba480..ec4146f7515 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -362,8 +362,8 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class=
Turning <literal>fastupdate</> off via <command>ALTER INDEX</> prevents
future insertions from going into the list of pending index entries,
but does not in itself flush previous entries. You might want to
- <command>VACUUM</> the table afterward to ensure the pending list is
- emptied.
+ <command>VACUUM</> the table or call <function>gin_clean_pending_list</>
+ function afterward to ensure the pending list is emptied.
</para>
</note>
</listitem>
diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c
index 681ce098144..09f41f55ac7 100644
--- a/src/backend/access/gin/ginfast.c
+++ b/src/backend/access/gin/ginfast.c
@@ -20,10 +20,13 @@
#include "access/gin_private.h"
#include "access/xloginsert.h"
+#include "access/xlog.h"
#include "commands/vacuum.h"
+#include "catalog/pg_am.h"
#include "miscadmin.h"
#include "utils/memutils.h"
#include "utils/rel.h"
+#include "utils/acl.h"
#include "storage/indexfsm.h"
/* GUC parameter */
@@ -958,3 +961,52 @@ ginInsertCleanup(GinState *ginstate,
MemoryContextSwitchTo(oldCtx);
MemoryContextDelete(opCtx);
}
+
+/*
+ * SQL-callable function to clean the insert pending list
+ */
+Datum
+gin_clean_pending_list(PG_FUNCTION_ARGS)
+{
+ Oid indexoid = PG_GETARG_OID(0);
+ Relation indexRel = index_open(indexoid, AccessShareLock);
+ IndexBulkDeleteResult stats;
+ GinState ginstate;
+
+ if (RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is in progress"),
+ errhint("GIN pending list cannot be cleaned up during recovery.")));
+
+ /* Must be a GIN index */
+ if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
+ indexRel->rd_rel->relam != GIN_AM_OID)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not a GIN index",
+ RelationGetRelationName(indexRel))));
+
+ /*
+ * Reject attempts to read non-local temporary relations; we would be
+ * likely to get wrong data since we have no visibility into the owning
+ * session's local buffers.
+ */
+ if (RELATION_IS_OTHER_TEMP(indexRel))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot access temporary indexes of other sessions")));
+
+ /* User must own the index (comparable to privileges needed for VACUUM) */
+ if (!pg_class_ownercheck(indexoid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS,
+ RelationGetRelationName(indexRel));
+
+ memset(&stats, 0, sizeof(stats));
+ initGinState(&ginstate, indexRel);
+ ginInsertCleanup(&ginstate, true, &stats);
+
+ index_close(indexRel, AccessShareLock);
+
+ PG_RETURN_INT64((int64) stats.pages_deleted);
+}
diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h
index 695959c5b18..d2ea58832ce 100644
--- a/src/include/access/gin_private.h
+++ b/src/include/access/gin_private.h
@@ -881,6 +881,9 @@ extern void ginFreeScanKeys(GinScanOpaque so);
/* ginget.c */
extern int64 gingetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
+/* ginfast.c */
+extern Datum gin_clean_pending_list(PG_FUNCTION_ARGS);
+
/* ginlogic.c */
extern void ginInitConsistentFunction(GinState *ginstate, GinScanKey key);
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 1f18806fb52..5c480b7d3ab 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201601271
+#define CATALOG_VERSION_NO 201601281
#endif
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 3a066abc82f..ba8760b37d4 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -4517,6 +4517,8 @@ DATA(insert OID = 3087 ( gin_extract_tsquery PGNSP PGUID 12 1 0 0 0 f f f f t f
DESCR("GIN tsvector support (obsolete)");
DATA(insert OID = 3088 ( gin_tsquery_consistent PGNSP PGUID 12 1 0 0 0 f f f f t f i s 6 0 16 "2281 21 3615 23 2281 2281" _null_ _null_ _null_ _null_ _null_ gin_tsquery_consistent_6args _null_ _null_ _null_ ));
DESCR("GIN tsvector support (obsolete)");
+DATA(insert OID = 3789 ( gin_clean_pending_list PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 20 "2205" _null_ _null_ _null_ _null_ _null_ gin_clean_pending_list _null_ _null_ _null_ ));
+DESCR("clean up GIN pending list");
DATA(insert OID = 3662 ( tsquery_lt PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_lt _null_ _null_ _null_ ));
DATA(insert OID = 3663 ( tsquery_le PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_le _null_ _null_ _null_ ));
diff --git a/src/test/regress/expected/gin.out b/src/test/regress/expected/gin.out
index c015fe7861a..cc7601c6677 100644
--- a/src/test/regress/expected/gin.out
+++ b/src/test/regress/expected/gin.out
@@ -8,7 +8,20 @@ create table gin_test_tbl(i int4[]);
create index gin_test_idx on gin_test_tbl using gin (i) with (fastupdate = on);
insert into gin_test_tbl select array[1, 2, g] from generate_series(1, 20000) g;
insert into gin_test_tbl select array[1, 3, g] from generate_series(1, 1000) g;
+select gin_clean_pending_list('gin_test_idx')>10 as many; -- flush the fastupdate buffers
+ many
+------
+ t
+(1 row)
+
+insert into gin_test_tbl select array[3, 1, g] from generate_series(1, 1000) g;
vacuum gin_test_tbl; -- flush the fastupdate buffers
+select gin_clean_pending_list('gin_test_idx'); -- nothing to flush
+ gin_clean_pending_list
+------------------------
+ 0
+(1 row)
+
-- Test vacuuming
delete from gin_test_tbl where i @> array[2];
vacuum gin_test_tbl;
diff --git a/src/test/regress/sql/gin.sql b/src/test/regress/sql/gin.sql
index 4b35560036a..31890b46d8c 100644
--- a/src/test/regress/sql/gin.sql
+++ b/src/test/regress/sql/gin.sql
@@ -10,8 +10,14 @@ create index gin_test_idx on gin_test_tbl using gin (i) with (fastupdate = on);
insert into gin_test_tbl select array[1, 2, g] from generate_series(1, 20000) g;
insert into gin_test_tbl select array[1, 3, g] from generate_series(1, 1000) g;
+select gin_clean_pending_list('gin_test_idx')>10 as many; -- flush the fastupdate buffers
+
+insert into gin_test_tbl select array[3, 1, g] from generate_series(1, 1000) g;
+
vacuum gin_test_tbl; -- flush the fastupdate buffers
+select gin_clean_pending_list('gin_test_idx'); -- nothing to flush
+
-- Test vacuuming
delete from gin_test_tbl where i @> array[2];
vacuum gin_test_tbl;