You can subscribe to this list here.
2010 |
Jan
|
Feb
|
Mar
|
Apr
(4) |
May
(28) |
Jun
(12) |
Jul
(11) |
Aug
(12) |
Sep
(5) |
Oct
(19) |
Nov
(14) |
Dec
(12) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2011 |
Jan
(18) |
Feb
(30) |
Mar
(115) |
Apr
(89) |
May
(50) |
Jun
(44) |
Jul
(22) |
Aug
(13) |
Sep
(11) |
Oct
(30) |
Nov
(28) |
Dec
(39) |
2012 |
Jan
(38) |
Feb
(18) |
Mar
(43) |
Apr
(91) |
May
(108) |
Jun
(46) |
Jul
(37) |
Aug
(44) |
Sep
(33) |
Oct
(29) |
Nov
(36) |
Dec
(15) |
2013 |
Jan
(35) |
Feb
(611) |
Mar
(5) |
Apr
(55) |
May
(30) |
Jun
(28) |
Jul
(458) |
Aug
(34) |
Sep
(9) |
Oct
(39) |
Nov
(22) |
Dec
(32) |
2014 |
Jan
(16) |
Feb
(16) |
Mar
(42) |
Apr
(179) |
May
(7) |
Jun
(6) |
Jul
(9) |
Aug
|
Sep
(4) |
Oct
|
Nov
(3) |
Dec
|
2015 |
Jan
|
Feb
|
Mar
|
Apr
(2) |
May
(4) |
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
S | M | T | W | T | F | S |
---|---|---|---|---|---|---|
|
|
1
(2) |
2
|
3
|
4
|
5
|
6
|
7
|
8
|
9
|
10
|
11
|
12
|
13
|
14
|
15
(1) |
16
(3) |
17
|
18
|
19
|
20
|
21
|
22
|
23
|
24
(2) |
25
|
26
|
27
(1) |
28
(1) |
29
(1) |
30
(1) |
|
|
|
From: andrei_mart <and...@us...> - 2010-06-30 19:55:24
|
Project "Postgres-XC". The branch, master has been updated via 5d83e22e3cabc3d1e5dc425f492e4459b30a67a0 (commit) via a1b6404cb6c214e9df075e3d827e8384555c7b44 (commit) from 49e836ebf1c86211c342f320838611fc48e6fa1f (commit) - Log ----------------------------------------------------------------- commit 5d83e22e3cabc3d1e5dc425f492e4459b30a67a0 Author: Andrei Martsinchyk <And...@en...> Date: Wed Jun 30 13:21:11 2010 +0300 Use ereport instead of Assert if sort operation is not defined This error is likely to happen if expression of non-sortable data type is in distinct clause diff --git a/src/backend/pgxc/plan/planner.c b/src/backend/pgxc/plan/planner.c index 1bbbb75..2cf488c 100644 --- a/src/backend/pgxc/plan/planner.c +++ b/src/backend/pgxc/plan/planner.c @@ -1556,7 +1556,11 @@ add_sort_column(AttrNumber colIdx, Oid sortOp, bool nulls_first, { int i; - Assert(OidIsValid(sortOp)); + if (!OidIsValid(sortOp)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify an ordering operator"))); + for (i = 0; i < numCols; i++) { commit a1b6404cb6c214e9df075e3d827e8384555c7b44 Author: Andrei Martsinchyk <And...@en...> Date: Wed Jun 30 13:17:03 2010 +0300 Reverted PANIC ereports back to ERROR They were changed for debugging purposes and accidently committed diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c index 82a7cf8..ae4ed73 100644 --- a/src/backend/utils/mmgr/mcxt.c +++ b/src/backend/utils/mmgr/mcxt.c @@ -507,7 +507,7 @@ MemoryContextAlloc(MemoryContext context, Size size) AssertArg(MemoryContextIsValid(context)); if (!AllocSizeIsValid(size)) - elog(PANIC, "invalid memory alloc request size %lu", + elog(ERROR, "invalid memory alloc request size %lu", (unsigned long) size); return (*context->methods->alloc) (context, size); @@ -528,7 +528,7 @@ MemoryContextAllocZero(MemoryContext context, Size size) AssertArg(MemoryContextIsValid(context)); if (!AllocSizeIsValid(size)) - elog(PANIC, "invalid memory alloc request size %lu", + elog(ERROR, "invalid memory alloc request size %lu", (unsigned long) size); ret = (*context->methods->alloc) (context, size); @@ -553,7 +553,7 @@ MemoryContextAllocZeroAligned(MemoryContext context, Size size) AssertArg(MemoryContextIsValid(context)); if (!AllocSizeIsValid(size)) - elog(PANIC, "invalid memory alloc request size %lu", + elog(ERROR, "invalid memory alloc request size %lu", (unsigned long) size); ret = (*context->methods->alloc) (context, size); @@ -617,7 +617,7 @@ repalloc(void *pointer, Size size) AssertArg(MemoryContextIsValid(header->context)); if (!AllocSizeIsValid(size)) - elog(PANIC, "invalid memory alloc request size %lu", + elog(ERROR, "invalid memory alloc request size %lu", (unsigned long) size); return (*header->context->methods->realloc) (header->context, ----------------------------------------------------------------------- Summary of changes: src/backend/pgxc/plan/planner.c | 6 +++++- src/backend/utils/mmgr/mcxt.c | 8 ++++---- 2 files changed, 9 insertions(+), 5 deletions(-) hooks/post-receive -- Postgres-XC |
From: mason_s <ma...@us...> - 2010-06-29 19:39:21
|
Project "Postgres-XC". The branch, master has been updated via 49e836ebf1c86211c342f320838611fc48e6fa1f (commit) via 6899314e5a0aad2ded36959ca8bc6e3d7243a586 (commit) from 592295640039744c89a1f319d87fb34072a10efa (commit) - Log ----------------------------------------------------------------- commit 49e836ebf1c86211c342f320838611fc48e6fa1f Author: Mason S <masonsharp@mason-sharps-macbook.local> Date: Tue Jun 29 21:32:26 2010 +0200 Add support for ORDER BY adn DISTINCT. This is handled on the Coordinator. It will push down the ORDER BY and merge-sort the sorted input streams from the nodes. It converts from DataRow to tuple format as needed. If one of the SELECT clause expressions is not in the ORDER BY, it appends it to the ORDER BY when pushing it down to the data nodes and leaves it off when returning to the client. With DISTINCT, an ORDER BY will be used and pushed down to the data nodes such that a merge-sort can be done and de-duplication can occur. By Andrei Martsinchyk diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c index a86716e..eab1bd0 100644 --- a/src/backend/access/common/heaptuple.c +++ b/src/backend/access/common/heaptuple.c @@ -57,6 +57,9 @@ #include "postgres.h" +#ifdef PGXC +#include "funcapi.h" +#endif #include "access/heapam.h" #include "access/sysattr.h" #include "access/tuptoaster.h" @@ -1157,6 +1160,80 @@ slot_deform_tuple(TupleTableSlot *slot, int natts) slot->tts_slow = slow; } +#ifdef PGXC +/* + * slot_deform_datarow + * Extract data from the DataRow message into Datum/isnull arrays. + * We always extract all atributes, as specified in tts_tupleDescriptor, + * because there is no easy way to find random attribute in the DataRow. + */ +static void +slot_deform_datarow(TupleTableSlot *slot) +{ + int attnum = slot->tts_tupleDescriptor->natts; + int i; + int col_count; + char *cur = slot->tts_dataRow; + StringInfo buffer; + uint16 n16; + uint32 n32; + + /* fastpath: exit if values already extracted */ + if (slot->tts_nvalid == attnum) + return; + + Assert(slot->tts_dataRow); + + memcpy(&n16, cur, 2); + cur += 2; + col_count = ntohs(n16); + + if (col_count != attnum) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("Tuple does not match the descriptor"))); + + if (slot->tts_attinmeta == NULL) + slot->tts_attinmeta = TupleDescGetAttInMetadata(slot->tts_tupleDescriptor); + + buffer = makeStringInfo(); + for (i = 0; i < attnum; i++) + { + Form_pg_attribute attr = slot->tts_tupleDescriptor->attrs[i]; + int len; + + /* get size */ + memcpy(&n32, cur, 4); + cur += 4; + len = ntohl(n32); + + /* get data */ + if (len == -1) + { + slot->tts_values[i] = (Datum) 0; + slot->tts_isnull[i] = true; + } + else + { + appendBinaryStringInfo(buffer, cur, len); + cur += len; + + slot->tts_values[i] = InputFunctionCall(slot->tts_attinmeta->attinfuncs + i, + buffer->data, + slot->tts_attinmeta->attioparams[i], + slot->tts_attinmeta->atttypmods[i]); + slot->tts_isnull[i] = false; + + resetStringInfo(buffer); + } + } + pfree(buffer->data); + pfree(buffer); + + slot->tts_nvalid = attnum; +} +#endif + /* * slot_getattr * This function fetches an attribute of the slot's current tuple. @@ -1250,6 +1327,11 @@ slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull) /* * Extract the attribute, along with any preceding attributes. */ +#ifdef PGXC + if (slot->tts_dataRow) + slot_deform_datarow(slot); + else +#endif slot_deform_tuple(slot, attnum); /* @@ -1276,6 +1358,15 @@ slot_getallattrs(TupleTableSlot *slot) if (slot->tts_nvalid == tdesc_natts) return; +#ifdef PGXC + /* Handle the DataRow tuple case */ + if (slot->tts_dataRow) + { + slot_deform_datarow(slot); + return; + } +#endif + /* * otherwise we had better have a physical tuple (tts_nvalid should equal * natts in all virtual-tuple cases) @@ -1319,6 +1410,15 @@ slot_getsomeattrs(TupleTableSlot *slot, int attnum) if (slot->tts_nvalid >= attnum) return; +#ifdef PGXC + /* Handle the DataRow tuple case */ + if (slot->tts_dataRow) + { + slot_deform_datarow(slot); + return; + } +#endif + /* Check for caller error */ if (attnum <= 0 || attnum > slot->tts_tupleDescriptor->natts) elog(ERROR, "invalid attribute number %d", attnum); diff --git a/src/backend/access/common/printtup.c b/src/backend/access/common/printtup.c index ee9c04a..99cd92e 100644 --- a/src/backend/access/common/printtup.c +++ b/src/backend/access/common/printtup.c @@ -292,6 +292,19 @@ printtup(TupleTableSlot *slot, DestReceiver *self) int natts = typeinfo->natts; int i; +#ifdef PGXC + /* + * If we are having DataRow-based tuple we do not have to encode attribute + * values, just send over the DataRow message as we received it from the + * data node + */ + if (slot->tts_dataRow) + { + pq_putmessage('D', slot->tts_dataRow, slot->tts_dataLen); + return; + } +#endif + /* Set or update my derived attribute info, if needed */ if (myState->attrinfo != typeinfo || myState->nattrs != natts) printtup_prepare_info(myState, typeinfo, natts); diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index d641df8..08e35ae 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -35,7 +35,7 @@ #include "parser/parse_relation.h" #ifdef PGXC #include "pgxc/pgxc.h" -#include "pgxc/datanode.h" +#include "pgxc/execRemote.h" #include "pgxc/locator.h" #include "pgxc/poolmgr.h" #endif @@ -1511,8 +1511,7 @@ DoCopy(const CopyStmt *stmt, const char *queryString) DataNodeCopyFinish( cstate->connections, primary_data_node, - COMBINE_TYPE_NONE, - whereToSendOutput); + COMBINE_TYPE_NONE); pfree(cstate->connections); pfree(cstate->query_buf.data); FreeRelationLocInfo(cstate->rel_loc); @@ -1526,14 +1525,12 @@ DoCopy(const CopyStmt *stmt, const char *queryString) cstate->processed = DataNodeCopyFinish( cstate->connections, primary_data_node, - COMBINE_TYPE_SAME, - whereToSendOutput); + COMBINE_TYPE_SAME); else cstate->processed = DataNodeCopyFinish( cstate->connections, 0, - COMBINE_TYPE_SUM, - whereToSendOutput); + COMBINE_TYPE_SUM); pfree(cstate->connections); pfree(cstate->query_buf.data); FreeRelationLocInfo(cstate->rel_loc); @@ -1775,10 +1772,10 @@ CopyTo(CopyState cstate) #ifdef PGXC if (IS_PGXC_COORDINATOR && !cstate->on_coord) { - DataNodeCopyOut(GetRelationNodes(cstate->rel_loc, NULL, true), - cstate->connections, - whereToSendOutput, - cstate->copy_file); + cstate->processed = DataNodeCopyOut( + GetRelationNodes(cstate->rel_loc, NULL, true), + cstate->connections, + cstate->copy_file); } else { diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c index 06142c9..53e424b 100644 --- a/src/backend/executor/execTuples.c +++ b/src/backend/executor/execTuples.c @@ -149,6 +149,12 @@ ExecCreateTupleTable(int tableSize) slot->tts_shouldFreeMin = false; slot->tts_tuple = NULL; slot->tts_tupleDescriptor = NULL; +#ifdef PGXC + slot->tts_shouldFreeRow = false; + slot->tts_dataRow = NULL; + slot->tts_dataLen = -1; + slot->tts_attinmeta = NULL; +#endif slot->tts_mcxt = CurrentMemoryContext; slot->tts_buffer = InvalidBuffer; slot->tts_nvalid = 0; @@ -228,6 +234,12 @@ MakeSingleTupleTableSlot(TupleDesc tupdesc) slot->tts_shouldFreeMin = false; slot->tts_tuple = NULL; slot->tts_tupleDescriptor = NULL; +#ifdef PGXC + slot->tts_shouldFreeRow = false; + slot->tts_dataRow = NULL; + slot->tts_dataLen = -1; + slot->tts_attinmeta = NULL; +#endif slot->tts_mcxt = CurrentMemoryContext; slot->tts_buffer = InvalidBuffer; slot->tts_nvalid = 0; @@ -334,6 +346,12 @@ ExecSetSlotDescriptor(TupleTableSlot *slot, /* slot to change */ if (slot->tts_tupleDescriptor) ReleaseTupleDesc(slot->tts_tupleDescriptor); +#ifdef PGXC + /* XXX there in no routine to release AttInMetadata instance */ + if (slot->tts_attinmeta) + slot->tts_attinmeta = NULL; +#endif + if (slot->tts_values) pfree(slot->tts_values); if (slot->tts_isnull) @@ -415,6 +433,14 @@ ExecStoreTuple(HeapTuple tuple, heap_freetuple(slot->tts_tuple); if (slot->tts_shouldFreeMin) heap_free_minimal_tuple(slot->tts_mintuple); +#ifdef PGXC + if (slot->tts_shouldFreeRow) + pfree(slot->tts_dataRow); + + slot->tts_shouldFreeRow = false; + slot->tts_dataRow = NULL; + slot->tts_dataLen = -1; +#endif /* * Store the new tuple into the specified slot. @@ -476,6 +502,14 @@ ExecStoreMinimalTuple(MinimalTuple mtup, heap_freetuple(slot->tts_tuple); if (slot->tts_shouldFreeMin) heap_free_minimal_tuple(slot->tts_mintuple); +#ifdef PGXC + if (slot->tts_shouldFreeRow) + pfree(slot->tts_dataRow); + + slot->tts_shouldFreeRow = false; + slot->tts_dataRow = NULL; + slot->tts_dataLen = -1; +#endif /* * Drop the pin on the referenced buffer, if there is one. @@ -504,6 +538,62 @@ ExecStoreMinimalTuple(MinimalTuple mtup, return slot; } +#ifdef PGXC +/* -------------------------------- + * ExecStoreDataRowTuple + * + * Store a buffer in DataRow message format into the slot. + * + * -------------------------------- + */ +TupleTableSlot * +ExecStoreDataRowTuple(char *msg, size_t len, TupleTableSlot *slot, bool shouldFree) +{ + /* + * sanity checks + */ + Assert(msg != NULL); + Assert(len > 0); + Assert(slot != NULL); + Assert(slot->tts_tupleDescriptor != NULL); + + /* + * Free any old physical tuple belonging to the slot. + */ + if (slot->tts_shouldFree) + heap_freetuple(slot->tts_tuple); + if (slot->tts_shouldFreeMin) + heap_free_minimal_tuple(slot->tts_mintuple); + if (slot->tts_shouldFreeRow) + pfree(slot->tts_dataRow); + + /* + * Drop the pin on the referenced buffer, if there is one. + */ + if (BufferIsValid(slot->tts_buffer)) + ReleaseBuffer(slot->tts_buffer); + + slot->tts_buffer = InvalidBuffer; + + /* + * Store the new tuple into the specified slot. + */ + slot->tts_isempty = false; + slot->tts_shouldFree = false; + slot->tts_shouldFreeMin = false; + slot->tts_shouldFreeRow = shouldFree; + slot->tts_tuple = NULL; + slot->tts_mintuple = NULL; + slot->tts_dataRow = msg; + slot->tts_dataLen = len; + + /* Mark extracted state invalid */ + slot->tts_nvalid = 0; + + return slot; +} +#endif + /* -------------------------------- * ExecClearTuple * @@ -527,6 +617,14 @@ ExecClearTuple(TupleTableSlot *slot) /* slot in which to store tuple */ heap_freetuple(slot->tts_tuple); if (slot->tts_shouldFreeMin) heap_free_minimal_tuple(slot->tts_mintuple); +#ifdef PGXC + if (slot->tts_shouldFreeRow) + pfree(slot->tts_dataRow); + + slot->tts_shouldFreeRow = false; + slot->tts_dataRow = NULL; + slot->tts_dataLen = -1; +#endif slot->tts_tuple = NULL; slot->tts_mintuple = NULL; @@ -634,7 +732,13 @@ ExecCopySlotTuple(TupleTableSlot *slot) return heap_copytuple(slot->tts_tuple); if (slot->tts_mintuple) return heap_tuple_from_minimal_tuple(slot->tts_mintuple); - +#ifdef PGXC + /* + * Ensure values are extracted from data row to the Datum array + */ + if (slot->tts_dataRow) + slot_getallattrs(slot); +#endif /* * Otherwise we need to build a tuple from the Datum array. */ @@ -667,7 +771,13 @@ ExecCopySlotMinimalTuple(TupleTableSlot *slot) return heap_copy_minimal_tuple(slot->tts_mintuple); if (slot->tts_tuple) return minimal_tuple_from_heap_tuple(slot->tts_tuple); - +#ifdef PGXC + /* + * Ensure values are extracted from data row to the Datum array + */ + if (slot->tts_dataRow) + slot_getallattrs(slot); +#endif /* * Otherwise we need to build a tuple from the Datum array. */ @@ -861,6 +971,14 @@ ExecMaterializeSlot(TupleTableSlot *slot) if (!slot->tts_shouldFreeMin) slot->tts_mintuple = NULL; +#ifdef PGXC + if (!slot->tts_shouldFreeRow) + { + slot->tts_dataRow = NULL; + slot->tts_dataLen = -1; + } +#endif + return slot->tts_tuple; } diff --git a/src/backend/pgxc/plan/planner.c b/src/backend/pgxc/plan/planner.c index ae537e5..1bbbb75 100644 --- a/src/backend/pgxc/plan/planner.c +++ b/src/backend/pgxc/plan/planner.c @@ -22,8 +22,10 @@ #include "catalog/pg_type.h" #include "lib/stringinfo.h" #include "nodes/nodeFuncs.h" +#include "nodes/nodes.h" #include "nodes/parsenodes.h" #include "optimizer/clauses.h" +#include "optimizer/tlist.h" #include "parser/parse_agg.h" #include "parser/parse_coerce.h" #include "pgxc/locator.h" @@ -123,12 +125,10 @@ typedef struct XCWalkerContext int varno; bool within_or; bool within_not; + List *join_list; /* A list of List*'s, one for each relation. */ } XCWalkerContext; -/* A list of List*'s, one for each relation. */ -List *join_list = NULL; - /* Forbid unsafe SQL statements */ bool StrictStatementChecking = true; @@ -185,12 +185,12 @@ new_pgxc_join(int relid1, char *aliasname1, int relid2, char *aliasname2) * Look up the join struct for a particular join */ static PGXC_Join * -find_pgxc_join(int relid1, char *aliasname1, int relid2, char *aliasname2) +find_pgxc_join(int relid1, char *aliasname1, int relid2, char *aliasname2, XCWalkerContext *context) { ListCell *lc; /* return if list is still empty */ - if (join_list == NULL) + if (context->join_list == NULL) return NULL; /* in the PGXC_Join struct, we always sort with relid1 < relid2 */ @@ -209,7 +209,7 @@ find_pgxc_join(int relid1, char *aliasname1, int relid2, char *aliasname2) * there should be a small number, so we just search linearly, although * long term a hash table would be better. */ - foreach(lc, join_list) + foreach(lc, context->join_list) { PGXC_Join *pgxcjoin = (PGXC_Join *) lfirst(lc); @@ -225,16 +225,16 @@ find_pgxc_join(int relid1, char *aliasname1, int relid2, char *aliasname2) * Find or create a join between 2 relations */ static PGXC_Join * -find_or_create_pgxc_join(int relid1, char *aliasname1, int relid2, char *aliasname2) +find_or_create_pgxc_join(int relid1, char *aliasname1, int relid2, char *aliasname2, XCWalkerContext *context) { PGXC_Join *pgxcjoin; - pgxcjoin = find_pgxc_join(relid1, aliasname1, relid2, aliasname2); + pgxcjoin = find_pgxc_join(relid1, aliasname1, relid2, aliasname2, context); if (pgxcjoin == NULL) { pgxcjoin = new_pgxc_join(relid1, aliasname1, relid2, aliasname2); - join_list = lappend(join_list, pgxcjoin); + context->join_list = lappend(context->join_list, pgxcjoin); } return pgxcjoin; @@ -277,7 +277,7 @@ free_special_relations(Special_Conditions *special_conditions) * frees join_list */ static void -free_join_list(void) +free_join_list(List *join_list) { if (join_list == NULL) return; @@ -368,13 +368,13 @@ get_base_var(Var *var, XCWalkerContext *context) } else if (rte->rtekind == RTE_SUBQUERY) { - /* + /* * Handle views like select * from v1 where col1 = 1 * where col1 is partition column of base relation */ /* the varattno corresponds with the subquery's target list (projections) */ TargetEntry *tle = list_nth(rte->subquery->targetList, var->varattno - 1); /* or varno? */ - + if (!IsA(tle->expr, Var)) return NULL; /* not column based expressoin, return */ else @@ -684,7 +684,7 @@ examine_conditions_walker(Node *expr_node, XCWalkerContext *context) /* get data struct about these two relations joining */ pgxc_join = find_or_create_pgxc_join(column_base->relid, column_base->relalias, - column_base2->relid, column_base2->relalias); + column_base2->relid, column_base2->relalias, context); if (rel_loc_info1->locatorType == LOCATOR_TYPE_REPLICATED) { @@ -914,7 +914,7 @@ contains_only_pg_catalog (List *rtable) { if (get_rel_namespace(rte->relid) != PG_CATALOG_NAMESPACE) return false; - } else if (rte->rtekind == RTE_SUBQUERY && + } else if (rte->rtekind == RTE_SUBQUERY && !contains_only_pg_catalog (rte->subquery->rtable)) return false; } @@ -967,7 +967,7 @@ get_plan_nodes_walker(Node *query_node, XCWalkerContext *context) { /* May be complicated. Before giving up, just check for pg_catalog usage */ if (contains_only_pg_catalog (query->rtable)) - { + { /* just pg_catalog tables */ context->exec_nodes = (Exec_Nodes *) palloc0(sizeof(Exec_Nodes)); context->exec_nodes->tableusagetype = TABLE_USAGE_TYPE_PGCATALOG; @@ -1018,7 +1018,7 @@ get_plan_nodes_walker(Node *query_node, XCWalkerContext *context) /* We compare to make sure that the subquery is safe to execute with previous- * we may have multiple ones in the FROM clause. - * We handle the simple case of allowing multiple subqueries in the from clause, + * We handle the simple case of allowing multiple subqueries in the from clause, * but only allow one of them to not contain replicated tables */ if (!from_query_nodes) @@ -1028,20 +1028,20 @@ get_plan_nodes_walker(Node *query_node, XCWalkerContext *context) /* ok, safe */ if (!from_query_nodes) from_query_nodes = current_nodes; - } + } else { if (from_query_nodes->tableusagetype == TABLE_USAGE_TYPE_USER_REPLICATED) from_query_nodes = current_nodes; else { - /* Allow if they are both using one node, and the same one */ + /* Allow if they are both using one node, and the same one */ if (!same_single_node (from_query_nodes->nodelist, current_nodes->nodelist)) /* Complicated */ return true; } } - } + } else if (rte->rtekind == RTE_RELATION) { /* Look for pg_catalog tables */ @@ -1049,7 +1049,7 @@ get_plan_nodes_walker(Node *query_node, XCWalkerContext *context) current_usage_type = TABLE_USAGE_TYPE_PGCATALOG; else current_usage_type = TABLE_USAGE_TYPE_USER; - } + } else if (rte->rtekind == RTE_FUNCTION) { /* See if it is a catalog function */ @@ -1095,9 +1095,9 @@ get_plan_nodes_walker(Node *query_node, XCWalkerContext *context) return true; /* Examine join conditions, see if each join is single-node safe */ - if (join_list != NULL) + if (context->join_list != NULL) { - foreach(lc, join_list) + foreach(lc, context->join_list) { PGXC_Join *pgxcjoin = (PGXC_Join *) lfirst(lc); @@ -1254,22 +1254,28 @@ static Exec_Nodes * get_plan_nodes(Query *query, bool isRead) { Exec_Nodes *result_nodes; - XCWalkerContext *context = palloc0(sizeof(XCWalkerContext)); - - context->query = query; - context->isRead = isRead; - - context->conditions = (Special_Conditions *) palloc0(sizeof(Special_Conditions)); - context->rtables = lappend(context->rtables, query->rtable); - - join_list = NULL; - - if (get_plan_nodes_walker((Node *) query, context)) + XCWalkerContext context; + + + context.query = query; + context.isRead = isRead; + context.exec_nodes = NULL; + context.conditions = (Special_Conditions *) palloc0(sizeof(Special_Conditions)); + context.rtables = NIL; + context.rtables = lappend(context.rtables, query->rtable); + context.multilevel_join = false; + context.varno = 0; + context.within_or = false; + context.within_not = false; + context.join_list = NIL; + + if (get_plan_nodes_walker((Node *) query, &context)) result_nodes = NULL; else - result_nodes = context->exec_nodes; + result_nodes = context.exec_nodes; - free_special_relations(context->conditions); + free_special_relations(context.conditions); + free_join_list(context.join_list); return result_nodes; } @@ -1304,7 +1310,6 @@ get_plan_nodes_command(Query *query) return NULL; } - free_join_list(); return exec_nodes; } @@ -1345,17 +1350,17 @@ static List * get_simple_aggregates(Query * query) { List *simple_agg_list = NIL; - + /* Check for simple multi-node aggregate */ if (query->hasAggs) { ListCell *lc; int column_pos = 0; - + foreach (lc, query->targetList) { TargetEntry *tle = (TargetEntry *) lfirst(lc); - + if (IsA(tle->expr, Aggref)) { /*PGXC borrowed this code from nodeAgg.c, see ExecInitAgg()*/ @@ -1422,7 +1427,7 @@ get_simple_aggregates(Query * query) get_func_name(finalfn_oid)); } } - + /* resolve actual type of transition state, if polymorphic */ aggcollecttype = aggform->aggcollecttype; @@ -1468,7 +1473,7 @@ get_simple_aggregates(Query * query) get_typlenbyval(aggcollecttype, &simple_agg->transtypeLen, &simple_agg->transtypeByVal); - + /* * initval is potentially null, so don't try to access it as a struct * field. Must do it the hard way with SysCacheGetAttr. @@ -1534,6 +1539,427 @@ get_simple_aggregates(Query * query) /* + * add_sort_column --- utility subroutine for building sort info arrays + * + * We need this routine because the same column might be selected more than + * once as a sort key column; if so, the extra mentions are redundant. + * + * Caller is assumed to have allocated the arrays large enough for the + * max possible number of columns. Return value is the new column count. + * + * PGXC: copied from optimizer/plan/planner.c + */ +static int +add_sort_column(AttrNumber colIdx, Oid sortOp, bool nulls_first, + int numCols, AttrNumber *sortColIdx, + Oid *sortOperators, bool *nullsFirst) +{ + int i; + + Assert(OidIsValid(sortOp)); + + for (i = 0; i < numCols; i++) + { + /* + * Note: we check sortOp because it's conceivable that "ORDER BY foo + * USING <, foo USING <<<" is not redundant, if <<< distinguishes + * values that < considers equal. We need not check nulls_first + * however because a lower-order column with the same sortop but + * opposite nulls direction is redundant. + */ + if (sortColIdx[i] == colIdx && sortOperators[i] == sortOp) + { + /* Already sorting by this col, so extra sort key is useless */ + return numCols; + } + } + + /* Add the column */ + sortColIdx[numCols] = colIdx; + sortOperators[numCols] = sortOp; + nullsFirst[numCols] = nulls_first; + return numCols + 1; +} + +/* + * add_distinct_column - utility subroutine to remove redundant columns, just + * like add_sort_column + */ +static int +add_distinct_column(AttrNumber colIdx, Oid eqOp, int numCols, + AttrNumber *sortColIdx, Oid *eqOperators) +{ + int i; + + Assert(OidIsValid(eqOp)); + + for (i = 0; i < numCols; i++) + { + if (sortColIdx[i] == colIdx && eqOperators[i] == eqOp) + { + /* Already sorting by this col, so extra sort key is useless */ + return numCols; + } + } + + /* Add the column */ + sortColIdx[numCols] = colIdx; + eqOperators[numCols] = eqOp; + return numCols + 1; +} + + +/* + * Reconstruct the step query + */ +static void +reconstruct_step_query(List *rtable, bool has_order_by, List *extra_sort, + RemoteQuery *step) +{ + List *context; + bool useprefix; + List *sub_tlist = step->plan.targetlist; + ListCell *l; + StringInfo buf = makeStringInfo(); + char *sql; + char *cur; + char *sql_from; + + context = deparse_context_for_plan((Node *) step, NULL, rtable, NIL); + useprefix = list_length(rtable) > 1; + + foreach(l, sub_tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + char *exprstr = deparse_expression((Node *) tle->expr, context, + useprefix, false); + + if (buf->len == 0) + { + appendStringInfo(buf, "SELECT "); + if (step->distinct) + appendStringInfo(buf, "DISTINCT "); + } + else + appendStringInfo(buf, ", "); + + appendStringInfoString(buf, exprstr); + } + + /* + * A kind of dummy + * Do not reconstruct remaining query, just search original statement + * for " FROM " and append remainder to the target list we just generated. + * Do not handle the case if " FROM " we found is not a "FROM" keyword, but, + * for example, a part of string constant. + */ + sql = pstrdup(step->sql_statement); /* mutable copy */ + /* string to upper case, for comparing */ + cur = sql; + while (*cur) + { + /* replace whitespace with a space */ + if (isspace((unsigned char) *cur)) + *cur = ' '; + *cur++ = toupper(*cur); + } + + /* find the keyword */ + sql_from = strstr(sql, " FROM "); + if (sql_from) + { + /* the same offset in the original string */ + int offset = sql_from - sql; + /* remove terminating semicolon */ + char *end = strrchr(step->sql_statement, ';'); + *end = '\0'; + + appendStringInfoString(buf, step->sql_statement + offset); + } + + if (extra_sort) + { + foreach(l, extra_sort) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + char *exprstr = deparse_expression((Node *) tle->expr, context, + useprefix, false); + + if (has_order_by) + appendStringInfo(buf, ", "); + else + { + appendStringInfo(buf, " ORDER BY "); + has_order_by = true; + } + + appendStringInfoString(buf, exprstr); + } + } + + /* do not need the copy */ + pfree(sql); + + /* free previous query */ + pfree(step->sql_statement); + /* get a copy of new query */ + step->sql_statement = pstrdup(buf->data); + /* free the query buffer */ + pfree(buf->data); + pfree(buf); +} + + +/* + * Plan to sort step tuples + * PGXC: copied and adopted from optimizer/plan/planner.c + */ +static void +make_simple_sort_from_sortclauses(Query *query, RemoteQuery *step) +{ + List *sortcls = query->sortClause; + List *distinctcls = query->distinctClause; + List *sub_tlist = step->plan.targetlist; + SimpleSort *sort; + SimpleDistinct *distinct; + ListCell *l; + int numsortkeys; + int numdistkeys; + AttrNumber *sortColIdx; + AttrNumber *distColIdx; + Oid *sortOperators; + Oid *eqOperators; + bool *nullsFirst; + bool need_reconstruct = false; + /* + * List of target list entries from DISTINCT which are not in the ORDER BY. + * The exressions should be appended to the ORDER BY clause of remote query + */ + List *extra_distincts = NIL; + + Assert(step->sort == NULL); + Assert(step->distinct == NULL); + + /* + * We will need at most list_length(sortcls) sort columns; possibly less + * Also need room for extra distinct expressions if we need to append them + */ + numsortkeys = list_length(sortcls) + list_length(distinctcls); + sortColIdx = (AttrNumber *) palloc(numsortkeys * sizeof(AttrNumber)); + sortOperators = (Oid *) palloc(numsortkeys * sizeof(Oid)); + nullsFirst = (bool *) palloc(numsortkeys * sizeof(bool)); + + numsortkeys = 0; + sort = (SimpleSort *) palloc(sizeof(SimpleSort)); + + if (sortcls) + { + foreach(l, sortcls) + { + SortGroupClause *sortcl = (SortGroupClause *) lfirst(l); + TargetEntry *tle = get_sortgroupclause_tle(sortcl, sub_tlist); + + if (tle->resjunk) + need_reconstruct = true; + + /* + * Check for the possibility of duplicate order-by clauses --- the + * parser should have removed 'em, but no point in sorting + * redundantly. + */ + numsortkeys = add_sort_column(tle->resno, sortcl->sortop, + sortcl->nulls_first, + numsortkeys, + sortColIdx, sortOperators, nullsFirst); + } + } + + if (distinctcls) + { + /* + * Validate distinct clause + * We have to sort tuples to filter duplicates, and if ORDER BY clause + * is already present the sort order specified here may be incompatible + * with order needed for distinct. + * + * To be compatible, all expressions from DISTINCT must appear at the + * beginning of ORDER BY list. If list of DISTINCT expressions is longer + * then ORDER BY we can make ORDER BY compatible we can append remaining + * expressions from DISTINCT to ORDER BY. Obviously ORDER BY must not + * contain expressions not from the DISTINCT list in this case. + * + * For validation purposes we use column indexes (AttrNumber) to + * identify expressions. May be this is not enough and we should revisit + * the algorithm. + * + * We validate compatibility as follow: + * 1. Make working copy of DISTINCT + * 1a. Remove possible duplicates when copying: do not add expression + * 2. If order by is empty they are already compatible, skip 3 + * 3. Iterate over ORDER BY items + * 3a. If the item is in the working copy delete it from the working + * list. If working list is empty after deletion DISTINCT and + * ORDER BY are compatible, so break the loop. If working list is + * not empty continue iterating + * 3b. ORDER BY clause may contain duplicates. So if we can not found + * expression in the remainder of DISTINCT, probably it has already + * been removed because of duplicate ORDER BY entry. Check original + * DISTINCT clause, if expression is there continue iterating. + * 3c. DISTINCT and ORDER BY are not compatible, emit error + * 4. DISTINCT and ORDER BY are compatible, if we have remaining items + * in the working copy we should append it to the order by list + */ + /* + * Create the list of unique DISTINCT clause expressions + */ + foreach(l, distinctcls) + { + SortGroupClause *distinctcl = (SortGroupClause *) lfirst(l); + TargetEntry *tle = get_sortgroupclause_tle(distinctcl, sub_tlist); + bool found = false; + + if (extra_distincts) + { + ListCell *xl; + + foreach(xl, extra_distincts) + { + TargetEntry *xtle = (TargetEntry *) lfirst(xl); + if (xtle->resno == tle->resno) + { + found = true; + break; + } + } + } + + if (!found) + extra_distincts = lappend(extra_distincts, tle); + } + + if (sortcls) + { + foreach(l, sortcls) + { + SortGroupClause *sortcl = (SortGroupClause *) lfirst(l); + TargetEntry *tle = get_sortgroupclause_tle(sortcl, sub_tlist); + bool found = false; + ListCell *xl; + ListCell *prev = NULL; + + /* Search for the expression in the DISTINCT clause */ + foreach(xl, extra_distincts) + { + TargetEntry *xtle = (TargetEntry *) lfirst(xl); + if (xtle->resno == tle->resno) + { + extra_distincts = list_delete_cell(extra_distincts, xl, + prev); + found = true; + break; + } + prev = xl; + } + + /* Probably we've done */ + if (found && list_length(extra_distincts) == 0) + break; + + /* Ensure sort expression is not a duplicate */ + if (!found) + { + foreach(xl, distinctcls) + { + SortGroupClause *xcl = (SortGroupClause *) lfirst(xl); + TargetEntry *xtle = get_sortgroupclause_tle(xcl, sub_tlist); + if (xtle->resno == tle->resno) + { + /* it is a duplicate then */ + found = true; + break; + } + } + } + + /* Give up, we do not support it */ + if (!found) + { + ereport(ERROR, + (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), + (errmsg("Such combination of ORDER BY and DISTINCT is not yet supported")))); + } + } + } + /* need to append to the ORDER BY */ + if (list_length(extra_distincts) > 0) + need_reconstruct = true; + + /* + * End of validation, expression to append to ORDER BY are in the + * extra_distincts list + */ + + distinct = (SimpleDistinct *) palloc(sizeof(SimpleDistinct)); + + /* + * We will need at most list_length(distinctcls) sort columns + */ + numdistkeys = list_length(distinctcls); + distColIdx = (AttrNumber *) palloc(numdistkeys * sizeof(AttrNumber)); + eqOperators = (Oid *) palloc(numdistkeys * sizeof(Oid)); + + numdistkeys = 0; + + foreach(l, distinctcls) + { + SortGroupClause *distinctcl = (SortGroupClause *) lfirst(l); + TargetEntry *tle = get_sortgroupclause_tle(distinctcl, sub_tlist); + + /* + * Check for the possibility of duplicate order-by clauses --- the + * parser should have removed 'em, but no point in sorting + * redundantly. + */ + numdistkeys = add_distinct_column(tle->resno, + distinctcl->eqop, + numdistkeys, + distColIdx, + eqOperators); + /* append also extra sort operator, if not already there */ + numsortkeys = add_sort_column(tle->resno, + distinctcl->sortop, + distinctcl->nulls_first, + numsortkeys, + sortColIdx, + sortOperators, + nullsFirst); + } + + Assert(numdistkeys > 0); + + distinct->numCols = numdistkeys; + distinct->uniqColIdx = distColIdx; + distinct->eqOperators = eqOperators; + + step->distinct = distinct; + } + + + Assert(numsortkeys > 0); + + sort->numCols = numsortkeys; + sort->sortColIdx = sortColIdx; + sort->sortOperators = sortOperators; + sort->nullsFirst = nullsFirst; + + step->sort = sort; + + if (need_reconstruct) + reconstruct_step_query(query->rtable, sortcls != NULL, extra_distincts, + step); +} + +/* * Build up a QueryPlan to execute on. * * For the prototype, there will only be one step, @@ -1543,17 +1969,16 @@ Query_Plan * GetQueryPlan(Node *parsetree, const char *sql_statement, List *querytree_list) { Query_Plan *query_plan = palloc(sizeof(Query_Plan)); - Query_Step *query_step = palloc(sizeof(Query_Step)); + RemoteQuery *query_step = makeNode(RemoteQuery); Query *query; - - query_plan->force_autocommit = false; - query_step->sql_statement = (char *) palloc(strlen(sql_statement) + 1); strcpy(query_step->sql_statement, sql_statement); query_step->exec_nodes = NULL; query_step->combine_type = COMBINE_TYPE_NONE; query_step->simple_aggregates = NULL; + query_step->read_only = false; + query_step->force_autocommit = false; query_plan->query_step_list = lappend(NULL, query_step); @@ -1565,11 +1990,16 @@ GetQueryPlan(Node *parsetree, const char *sql_statement, List *querytree_list) switch (nodeTag(parsetree)) { case T_SelectStmt: + /* Optimize multi-node handling */ + query_step->read_only = true; + /* fallthru */ case T_InsertStmt: case T_UpdateStmt: case T_DeleteStmt: /* just use first one in querytree_list */ query = (Query *) linitial(querytree_list); + /* should copy instead ? */ + query_step->plan.targetlist = query->targetList; /* Perform some checks to make sure we can support the statement */ if (nodeTag(parsetree) == T_SelectStmt) @@ -1633,6 +2063,12 @@ GetQueryPlan(Node *parsetree, const char *sql_statement, List *querytree_list) } /* + * Add sortring to the step + */ + if (query->sortClause || query->distinctClause) + make_simple_sort_from_sortclauses(query, query_step); + + /* * PG-XC cannot yet support some variations of SQL statements. * We perform some checks to at least catch common cases */ @@ -1658,15 +2094,6 @@ GetQueryPlan(Node *parsetree, const char *sql_statement, List *querytree_list) ereport(ERROR, (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), (errmsg("Multi-node LIMIT not yet supported")))); - if (query->sortClause && StrictSelectChecking) - ereport(ERROR, - (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), - (errmsg("Multi-node ORDER BY not yet supported")))); - /* PGXCTODO - check if first column partitioning column */ - if (query->distinctClause) - ereport(ERROR, - (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), - (errmsg("Multi-node DISTINCT`not yet supported")))); } } break; @@ -1686,7 +2113,7 @@ GetQueryPlan(Node *parsetree, const char *sql_statement, List *querytree_list) case T_DropdbStmt: case T_VacuumStmt: query_plan->exec_loc_type = EXEC_ON_COORD | EXEC_ON_DATA_NODES; - query_plan->force_autocommit = true; + query_step->force_autocommit = true; break; case T_DropPropertyStmt: @@ -1864,7 +2291,7 @@ GetQueryPlan(Node *parsetree, const char *sql_statement, List *querytree_list) * Free Query_Step struct */ static void -free_query_step(Query_Step *query_step) +free_query_step(RemoteQuery *query_step) { if (query_step == NULL) return; @@ -1894,7 +2321,7 @@ FreeQueryPlan(Query_Plan *query_plan) return; foreach(item, query_plan->query_step_list) - free_query_step((Query_Step *) lfirst(item)); + free_query_step((RemoteQuery *) lfirst(item)); pfree(query_plan->query_step_list); pfree(query_plan); diff --git a/src/backend/pgxc/pool/Makefile b/src/backend/pgxc/pool/Makefile index 7143af5..e875303 100644 --- a/src/backend/pgxc/pool/Makefile +++ b/src/backend/pgxc/pool/Makefile @@ -14,6 +14,6 @@ subdir = src/backend/pgxc/pool top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = combiner.o datanode.o poolmgr.o poolcomm.o +OBJS = datanode.o execRemote.o poolmgr.o poolcomm.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/pgxc/pool/combiner.c b/src/backend/pgxc/pool/combiner.c deleted file mode 100644 index 53e5dfb..0000000 --- a/src/backend/pgxc/pool/combiner.c +++ /dev/null @@ -1,652 +0,0 @@ -/*------------------------------------------------------------------------- - * - * combiner.c - * - * Combine responses from multiple Data Nodes - * - * - * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation - * - * IDENTIFICATION - * $$ - * - *------------------------------------------------------------------------- - */ - -#include "postgres.h" -#include "pgxc/combiner.h" -#include "pgxc/planner.h" -#include "catalog/pg_type.h" -#include "libpq/libpq.h" -#include "libpq/pqformat.h" -#include "utils/builtins.h" -#include "utils/datum.h" - - -/* - * Create a structure to store parameters needed to combine responses from - * multiple connections as well as state information - */ -ResponseCombiner -CreateResponseCombiner(int node_count, CombineType combine_type, - CommandDest dest) -{ - ResponseCombiner combiner; - - /* ResponseComber is a typedef for pointer to ResponseCombinerData */ - combiner = (ResponseCombiner) palloc(sizeof(ResponseCombinerData)); - if (combiner == NULL) - { - /* Out of memory */ - return combiner; - } - - combiner->node_count = node_count; - combiner->combine_type = combine_type; - combiner->dest = dest; - combiner->command_complete_count = 0; - combiner->row_count = 0; - combiner->request_type = REQUEST_TYPE_NOT_DEFINED; - combiner->description_count = 0; - combiner->copy_in_count = 0; - combiner->copy_out_count = 0; - combiner->inErrorState = false; - combiner->initAggregates = true; - combiner->simple_aggregates = NULL; - combiner->copy_file = NULL; - - return combiner; -} - -/* - * Parse out row count from the command status response and convert it to integer - */ -static int -parse_row_count(const char *message, size_t len, int *rowcount) -{ - int digits = 0; - int pos; - - *rowcount = 0; - /* skip \0 string terminator */ - for (pos = 0; pos < len - 1; pos++) - { - if (message[pos] >= '0' && message[pos] <= '9') - { - *rowcount = *rowcount * 10 + message[pos] - '0'; - digits++; - } - else - { - *rowcount = 0; - digits = 0; - } - } - return digits; -} - -/* - * Extract a transition value from data row. Invoke the Input Function - * associated with the transition data type to represent value as a Datum. - * Output parameters value and val_null, receive extracted value and indicate - * whether it is null. - */ -static void -parse_aggregate_value(SimpleAgg *simple_agg, char *col_data, size_t datalen, Datum *value, bool *val_null) -{ - /* Check NULL */ - if (datalen == -1) - { - *value = (Datum) 0; - *val_null = true; - } - else - { - resetStringInfo(&simple_agg->valuebuf); - appendBinaryStringInfo(&simple_agg->valuebuf, col_data, datalen); - *value = InputFunctionCall(&simple_agg->arginputfn, simple_agg->valuebuf.data, simple_agg->argioparam, -1); - *val_null = false; - } -} - -/* - * Initialize the collection value, when agregation is first set up, or for a - * new group (grouping support is not implemented yet) - */ -static void -initialize_collect_aggregates(SimpleAgg *simple_agg) -{ - if (simple_agg->initValueIsNull) - simple_agg->collectValue = simple_agg->initValue; - else - simple_agg->collectValue = datumCopy(simple_agg->initValue, - simple_agg->transtypeByVal, - simple_agg->transtypeLen); - simple_agg->noCollectValue = simple_agg->initValueIsNull; - simple_agg->collectValueNull = simple_agg->initValueIsNull; -} - -/* - * Finalize the aggregate after current group or entire relation is processed - * (grouping support is not implemented yet) - */ -static void -finalize_collect_aggregates(SimpleAgg *simple_agg, Datum *resultVal, bool *resultIsNull) -{ - /* - * Apply the agg's finalfn if one is provided, else return collectValue. - */ - if (OidIsValid(simple_agg->finalfn_oid)) - { - FunctionCallInfoData fcinfo; - - InitFunctionCallInfoData(fcinfo, &(simple_agg->finalfn), 1, - (void *) simple_agg, NULL); - fcinfo.arg[0] = simple_agg->collectValue; - fcinfo.argnull[0] = simple_agg->collectValueNull; - if (fcinfo.flinfo->fn_strict && simple_agg->collectValueNull) - { - /* don't call a strict function with NULL inputs */ - *resultVal = (Datum) 0; - *resultIsNull = true; - } - else - { - *resultVal = FunctionCallInvoke(&fcinfo); - *resultIsNull = fcinfo.isnull; - } - } - else - { - *resultVal = simple_agg->collectValue; - *resultIsNull = simple_agg->collectValueNull; - } -} - -/* - * Given new input value(s), advance the transition function of an aggregate. - * - * The new values (and null flags) have been preloaded into argument positions - * 1 and up in fcinfo, so that we needn't copy them again to pass to the - * collection function. No other fields of fcinfo are assumed valid. - * - * It doesn't matter which memory context this is called in. - */ -static void -advance_collect_function(SimpleAgg *simple_agg, FunctionCallInfoData *fcinfo) -{ - Datum newVal; - - if (simple_agg->transfn.fn_strict) - { - /* - * For a strict transfn, nothing happens when there's a NULL input; we - * just keep the prior transValue. - */ - if (fcinfo->argnull[1]) - return; - if (simple_agg->noCollectValue) - { - /* - * result has not been initialized - * We must copy the datum into result if it is pass-by-ref. We - * do not need to pfree the old result, since it's NULL. - */ - simple_agg->collectValue = datumCopy(fcinfo->arg[1], - simple_agg->transtypeByVal, - simple_agg->transtypeLen); - simple_agg->collectValueNull = false; - simple_agg->noCollectValue = false; - return; - } - if (simple_agg->collectValueNull) - { - /* - * Don't call a strict function with NULL inputs. Note it is - * possible to get here despite the above tests, if the transfn is - * strict *and* returned a NULL on a prior cycle. If that happens - * we will propagate the NULL all the way to the end. - */ - return; - } - } - - /* - * OK to call the transition function - */ - InitFunctionCallInfoData(*fcinfo, &(simple_agg->transfn), 2, (void *) simple_agg, NULL); - fcinfo->arg[0] = simple_agg->collectValue; - fcinfo->argnull[0] = simple_agg->collectValueNull; - newVal = FunctionCallInvoke(fcinfo); - - /* - * If pass-by-ref datatype, must copy the new value into aggcontext and - * pfree the prior transValue. But if transfn returned a pointer to its - * first input, we don't need to do anything. - */ - if (!simple_agg->transtypeByVal && - DatumGetPointer(newVal) != DatumGetPointer(simple_agg->collectValue)) - { - if (!fcinfo->isnull) - { - newVal = datumCopy(newVal, - simple_agg->transtypeByVal, - simple_agg->transtypeLen); - } - if (!simple_agg->collectValueNull) - pfree(DatumGetPointer(simple_agg->collectValue)); - } - - simple_agg->collectValue = newVal; - simple_agg->collectValueNull = fcinfo->isnull; -} - -/* - * Handle response message and update combiner's state. - * This function contains main combiner logic - */ -int -CombineResponse(ResponseCombiner combiner, char msg_type, char *msg_body, size_t len) -{ - int digits = 0; - - /* Ignore anything if we have encountered error */ - if (combiner->inErrorState) - return EOF; - - switch (msg_type) - { - case 'c': /* CopyOutCommandComplete */ - if (combiner->request_type == REQUEST_TYPE_NOT_DEFINED) - combiner->request_type = REQUEST_TYPE_COPY_OUT; - if (combiner->request_type != REQUEST_TYPE_COPY_OUT) - /* Inconsistent responses */ - ereport(ERROR, - (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("Unexpected response from the data nodes"))); - /* Just do nothing, close message is managed by the coordinator */ - combiner->copy_out_count++; - break; - case 'C': /* CommandComplete */ - /* - * If we did not receive description we are having rowcount or OK - * response - */ - if (combiner->request_type == REQUEST_TYPE_NOT_DEFINED) - combiner->request_type = REQUEST_TYPE_COMMAND; - /* Extract rowcount */ - if (combiner->combine_type != COMBINE_TYPE_NONE) - { - int rowcount; - digits = parse_row_count(msg_body, len, &rowcount); - if (digits > 0) - { - /* Replicated write, make sure they are the same */ - if (combiner->combine_type == COMBINE_TYPE_SAME) - { - if (combiner->command_complete_count) - { - if (rowcount != combiner->row_count) - /* There is a consistency issue in the database with the replicated table */ - ereport(ERROR, - (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("Write to replicated table returned different results from the data nodes"))); - } - else - /* first result */ - combiner->row_count = rowcount; - } - else - combiner->row_count += rowcount; - } - else - combiner->combine_type = COMBINE_TYPE_NONE; - } - if (++combiner->command_complete_count == combiner->node_count) - { - - if (combiner->simple_aggregates - /* - * Aggregates has not been initialized - that means - * no rows received from data nodes, nothing to send - * It is possible if HAVING clause is present - */ - && !combiner->initAggregates) - { - /* Build up and send a datarow with aggregates */ - StringInfo dataRowBuffer = makeStringInfo(); - ListCell *lc; - - /* Number of fields */ - pq_sendint(dataRowBuffer, list_length(combiner->simple_aggregates), 2); - - foreach (lc, combiner->simple_aggregates) - { - SimpleAgg *simple_agg = (SimpleAgg *) lfirst(lc); - Datum resultVal; - bool resultIsNull; - - finalize_collect_aggregates(simple_agg, &resultVal, &resultIsNull); - /* Aggregation result */ - if (resultIsNull) - { - pq_sendint(dataRowBuffer, -1, 4); - } - else - { - char *text = OutputFunctionCall(&simple_agg->resoutputfn, resultVal); - size_t len = strlen(text); - pq_sendint(dataRowBuffer, len, 4); - pq_sendtext(dataRowBuffer, text, len); - } - } - pq_putmessage('D', dataRowBuffer->data, dataRowBuffer->len); - pfree(dataRowBuffer->data); - pfree(dataRowBuffer); - } - if (combiner->dest == DestRemote - || combiner->dest == DestRemoteExecute) - { - if (combiner->combine_type == COMBINE_TYPE_NONE) - { - pq_putmessage(msg_type, msg_body, len); - } - else - { - char command_complete_buffer[256]; - - /* Truncate msg_body to get base string */ - msg_body[len - digits - 1] = '\0'; - len = sprintf(command_complete_buffer, "%s%d", msg_body, combiner->row_count) + 1; - pq_putmessage(msg_type, command_complete_buffer, len); - } - } - } - break; - case 'T': /* RowDescription */ - if (combiner->request_type == REQUEST_TYPE_NOT_DEFINED) - combiner->request_type = REQUEST_TYPE_QUERY; - if (combiner->request_type != REQUEST_TYPE_QUERY) - { - /* Inconsistent responses */ - ereport(ERROR, - (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("Unexpected response from the data nodes"))); - } - /* Proxy first */ - if (combiner->description_count++ == 0) - { - if (combiner->dest == DestRemote - || combiner->dest == DestRemoteExecute) - pq_putmessage(msg_type, msg_body, len); - } - break; - case 'S': /* ParameterStatus (SET command) */ - if (combiner->request_type == REQUEST_TYPE_NOT_DEFINED) - combiner->request_type = REQUEST_TYPE_QUERY; - if (combiner->request_type != REQUEST_TYPE_QUERY) - { - /* Inconsistent responses */ - ereport(ERROR, - (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("Unexpected response from the data nodes"))); - } - /* Proxy last */ - if (++combiner->description_count == combiner->node_count) - { - if (combiner->dest == DestRemote - || combiner->dest == DestRemoteExecute) - pq_putmessage(msg_type, msg_body, len); - } - break; - case 'G': /* CopyInResponse */ - if (combiner->request_type == REQUEST_TYPE_NOT_DEFINED) - combiner->request_type = REQUEST_TYPE_COPY_IN; - if (combiner->request_type != REQUEST_TYPE_COPY_IN) - { - /* Inconsistent responses */ - ereport(ERROR, - (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("Unexpected response from the data nodes"))); - } - /* Proxy first */ - if (combiner->copy_in_count++ == 0) - { - if (combiner->dest == DestRemote - || combiner->dest == DestRemoteExecute) - pq_putmessage(msg_type, msg_body, len); - } - break; - case 'H': /* CopyOutResponse */ - if (combiner->request_type == REQUEST_TYPE_NOT_DEFINED) - combiner->request_type = REQUEST_TYPE_COPY_OUT; - if (combiner->request_type != REQUEST_TYPE_COPY_OUT) - { - /* Inconsistent responses */ - ereport(ERROR, - (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("Unexpected response from the data nodes"))); - } - /* - * The normal PG code will output an H message when it runs in the - * coordinator, so do not proxy message here, just count it. - */ - combiner->copy_out_count++; - break; - case 'd': /* CopyOutDataRow */ - if (combiner->request_type == REQUEST_TYPE_NOT_DEFINED) - combiner->request_type = REQUEST_TYPE_COPY_OUT; - - /* Inconsistent responses */ - if (combiner->request_type != REQUEST_TYPE_COPY_OUT) - ereport(ERROR, - (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("Unexpected response from the data nodes"))); - - /* If there is a copy file, data has to be sent to the local file */ - if (combiner->copy_file) - { - /* write data to the copy file */ - char *data_row; - data_row = (char *) palloc0(len); - memcpy(data_row, msg_body, len); - - fwrite(data_row, 1, len, combiner->copy_file); - break; - } - /* - * In this case data is sent back to the client - */ - if (combiner->dest == DestRemote - || combiner->dest == DestRemoteExecute) - { - StringInfo data_buffer; - - data_buffer = makeStringInfo(); - - pq_sendtext(data_buffer, msg_body, len); - pq_putmessage(msg_type, - data_buffer->data, - data_buffer->len); - - pfree(data_buffer->data); - pfree(data_buffer); - } - break; - case 'D': /* DataRow */ - if (!combiner->simple_aggregates) - { - if (combiner->dest == DestRemote - || combiner->dest == DestRemoteExecute) - pq_putmessage(msg_type, msg_body, len); - } - else - { - ListCell *lc; - char **col_values; - int *col_value_len; - uint16 col_count; - int i, cur = 0; - - /* Get values from the data row into array to speed up access */ - memcpy(&col_count, msg_body, 2); - col_count = ntohs(col_count); - cur += 2; - - col_values = (char **) palloc0(col_count * sizeof(char *)); - col_value_len = (int *) palloc0(col_count * sizeof(int)); - for (i = 0; i < col_count; i++) - { - int n32; - - memcpy(&n32, msg_body + cur, 4); - col_value_len[i] = ntohl(n32); - cur += 4; - - if (col_value_len[i] != -1) - { - col_values[i] = msg_body + cur; - cur += col_value_len[i]; - } - } - - if (combiner->initAggregates) - { - foreach (lc, combiner->simple_aggregates) - initialize_collect_aggregates((SimpleAgg *) lfirst(lc)); - - combiner->initAggregates = false; - } - - foreach (lc, combiner->simple_aggregates) - { - SimpleAgg *simple_agg = (SimpleAgg *) lfirst(lc); - FunctionCallInfoData fcinfo; - - parse_aggregate_value(simple_agg, - col_values[simple_agg->column_pos], - col_value_len[simple_agg->column_pos], - fcinfo.arg + 1, - fcinfo.argnull + 1); - - advance_collect_function(simple_agg, &fcinfo); - } - pfree(col_values); - pfree(col_value_len); - } - break; - case 'E': /* ErrorResponse */ - combiner->inErrorState = true; - /* fallthru */ - case 'A': /* NotificationResponse */ - case 'N': /* NoticeResponse */ - /* Proxy error message back if specified, - * or if doing internal primary copy - */ - if (combiner->dest == DestRemote - || combiner->dest == DestRemoteExecute) - pq_putmessage(msg_type, msg_body, len); - break; - case 'I': /* EmptyQuery */ - default: - /* Unexpected message */ - ereport(ERROR, - (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("Unexpected response from the data nodes"))); - } - return 0; -} - -/* - * Examine the specified combiner state and determine if command was completed - * successfully - */ -static bool -validate_combiner(ResponseCombiner combiner) -{ - /* There was error message while combining */ - if (combiner->inErrorState) - return false; - /* Check if state is defined */ - if (combiner->request_type == REQUEST_TYPE_NOT_DEFINED) - return false; - /* Check all nodes completed */ - if ((combiner->request_type == REQUEST_TYPE_COMMAND - || combiner->request_type == REQUEST_TYPE_QUERY) - && combiner->command_complete_count != combiner->node_count) - return false; - - /* Check count of description responses */ - if (combiner->request_type == REQUEST_TYPE_QUERY - && combiner->description_count != combiner->node_count) - return false; - - /* Check count of copy-in responses */ - if (combiner->request_type == REQUEST_TYPE_COPY_IN - && combiner->copy_in_count != combiner->node_count) - return false; - - /* Check count of copy-out responses */ - if (combiner->request_type == REQUEST_TYPE_COPY_OUT - && combiner->copy_out_count != combiner->node_count) - return false; - - /* Add other checks here as needed */ - - /* All is good if we are here */ - return true; -} - -/* - * Validate combiner and release storage freeing allocated memory - */ -bool -ValidateAndCloseCombiner(ResponseCombiner combiner) -{ - bool valid = validate_combiner(combiner); - - pfree(combiner); - - return valid; -} - -/* - * Validate combiner and reset storage - */ -bool -ValidateAndResetCombiner(ResponseCombiner combiner) -{ - bool valid = validate_combiner(combiner); - - combiner->command_complete_count = 0; - combiner->row_count = 0; - combiner->request_type = REQUEST_TYPE_NOT_DEFINED; - combiner->description_count = 0; - combiner->copy_in_count = 0; - combiner->copy_out_count = 0; - combiner->inErrorState = false; - combiner->simple_aggregates = NULL; - combiner->copy_file = NULL; - - return valid; -} - -/* - * Close combiner and free allocated memory, if it is not needed - */ -void -CloseCombiner(ResponseCombiner combiner) -{ - if (combiner) - pfree(combiner); -} - -/* - * Assign combiner aggregates - */ -void -AssignCombinerAggregates(ResponseCombiner combiner, List *simple_aggregates) -{ - combiner->simple_aggregates = simple_aggregates; -} diff --git a/src/backend/pgxc/pool/datanode.c b/src/backend/pgxc/pool/datanode.c index 6a1aba8..517b1e4 100644 --- a/src/backend/pgxc/pool/datanode.c +++ b/src/backend/pgxc/pool/datanode.c @@ -15,6 +15,7 @@ *------------------------------------------------------------------------- */ +#include "postgres.h" #include <sys/select.h> #include <sys/time.h> #include <sys/types.h> @@ -22,166 +23,33 @@ #include <string.h> #include <unistd.h> #include <errno.h> -#include "pgxc/poolmgr.h" #include "access/gtm.h" #include "access/transam.h" #include "access/xact.h" -#include "postgres.h" -#include "utils/snapmgr.h" -#include "pgxc/pgxc.h" #include "gtm/gtm_c.h" #include "pgxc/datanode.h" #include "pgxc/locator.h" -#include "../interfaces/libpq/libpq-fe.h" +#include "pgxc/pgxc.h" +#include "pgxc/poolmgr.h" +#include "tcop/dest.h" #include "utils/elog.h" #include "utils/memutils.h" - +#include "utils/snapmgr.h" +#include "../interfaces/libpq/libpq-fe.h" #define NO_SOCKET -1 -/* - * Buffer size does not affect performance significantly, just do not allow - * connection buffer grows infinitely - */ -#define COPY_BUFFER_SIZE 8192 -#define PRIMARY_NODE_WRITEAHEAD 1024 * 1024 - static int node_count = 0; static DataNodeHandle *handles = NULL; -static bool autocommit = true; -static DataNodeHandle **write_node_list = NULL; -static int write_node_count = 0; -static DataNodeHandle **get_handles(List *nodelist); -static int get_transaction_nodes(DataNodeHandle **connections); -static void release_handles(void); - -static void data_node_init(DataNodeHandle *handle, int sock); +static void data_node_init(DataNodeHandle *handle, int sock, int nodenum); static void data_node_free(DataNodeHandle *handle); -static int data_node_begin(int conn_count, DataNodeHandle **connections, CommandDest dest, GlobalTransactionId gxid); -static int data_node_commit(int conn_count, DataNodeHandle **connections, CommandDest dest); -static int data_node_rollback(int conn_count, DataNodeHandle **connections, CommandDest dest); - -static int ensure_in_buffer_capacity(size_t bytes_needed, DataNodeHandle *handle); -static int ensure_out_buffer_capacity(size_t bytes_needed, DataNodeHandle *handle); - -static int data_node_send_query(DataNodeHandle *handle, const char *query); -static int data_node_send_gxid(DataNodeHandle *handle, GlobalTransactionId gxid); -static int data_node_send_snapshot(DataNodeHandle *handle, Snapshot snapshot); - -static void add_error_message(DataNodeHandle *handle, const char *message); - -static int data_node_read_data(DataNodeHandle *conn); -static int handle_response(DataNodeHandle *conn, ResponseCombiner combiner); - -static int get_int(DataNodeHandle *conn, size_t len, int *out); -static int get_char(DataNodeHandle *conn, char *out); - -static void clear_write_node_list(); - -#define MAX_STATEMENTS_PER_TRAN 10 - -/* Variables to collect statistics */ -static int total_transactions = 0; -static int total_statements = 0; -static int total_autocommit = 0; -static int nonautocommit_2pc = 0; -static int autocommit_2pc = 0; -static int current_tran_statements = 0; -static int *statements_per_transaction = NULL; -static int *nodes_per_transaction = NULL; - -/* - * statistics collection: count a statement - */ -static void -stat_statement() -{ - total_statements++; - current_tran_statements++; -} - -/* - * To collect statistics: count a transaction - */ -static void -stat_transaction(int node_count) -{ - total_transactions++; - if (autocommit) - total_autocommit++; - - if (!statements_per_transaction) - { - statements_per_transaction = (int *) malloc((MAX_STATEMENTS_PER_TRAN + 1) * sizeof(int)); - memset(statements_per_transaction, 0, (MAX_STATEMENTS_PER_TRAN + 1) * sizeof(int)); - } - - if (current_tran_statements > MAX_STATEMENTS_PER_TRAN) - statements_per_transaction[MAX_STATEMENTS_PER_TRAN]++; - else - statements_per_transaction[current_tran_statements]++; - - current_tran_statements = 0; - if (node_count > 0 && node_count <= NumDataNodes) - { - if (!nodes_per_transaction) - { - nodes_per_transaction = (int *) malloc(NumDataNodes * sizeof(int)); - memset(nodes_per_transaction, 0, NumDataNodes * sizeof(int)); - } - nodes_per_transaction[node_count - 1]++; - } -} - - -/* - * To collect statistics: count a two-phase commit on nodes - */ -static void -stat_2pc(void) -{ - if (autocommit) - autocommit_2pc++; - else - nonautocommit_2pc++; -} +static int get_int(DataNodeHandle * conn, size_t len, int *out); +static int get_char(DataNodeHandle * conn, char *out); /* - * Output collected statistics to the log - */ -static void -stat_log(void) -{ - elog(DEBUG1, "Total Transactions: %d Tota... [truncated message content] |
From: mason_s <ma...@us...> - 2010-06-28 15:18:13
|
Project "Postgres-XC". The branch, master has been updated via 592295640039744c89a1f319d87fb34072a10efa (commit) from a32e437055fe9f9162651fd6edd811b77f443881 (commit) - Log ----------------------------------------------------------------- commit 592295640039744c89a1f319d87fb34072a10efa Author: Mason S <masonsharp@mason-sharps-macbook.local> Date: Mon Jun 28 17:08:20 2010 +0200 Allow rules to be created, provided that they do not use NOTIFY, which is not yet supported. Note that using rules is a bit unsafe. We currently end up passing down the original statement (or something close to it) to the data nodes, but based on the modifications to the rewritten tree from the rules. It is possible to do something that violates the distribution rules of the system. For example, on an update, one could insert into a table that is hash distributed, but populate it with a value that violates this. diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index acd9f97..2608a3f 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -1613,6 +1613,13 @@ transformRuleStmt(RuleStmt *stmt, const char *queryString, bool has_old, has_new; +#ifdef PGXC + if(IsA(action, NotifyStmt)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("Rule may not use NOTIFY, it is not yet supported"))); + +#endif /* * Since outer ParseState isn't parent of inner, have to pass down * the query text by hand. diff --git a/src/backend/pgxc/plan/planner.c b/src/backend/pgxc/plan/planner.c index 7f45fb7..a4565e7 100644 --- a/src/backend/pgxc/plan/planner.c +++ b/src/backend/pgxc/plan/planner.c @@ -1757,6 +1757,7 @@ GetQueryPlan(Node *parsetree, const char *sql_statement, List *querytree_list) case T_RemoveOpClassStmt: case T_RemoveOpFamilyStmt: case T_RenameStmt: + case T_RuleStmt: case T_TruncateStmt: case T_VariableSetStmt: case T_ViewStmt: @@ -1841,7 +1842,6 @@ GetQueryPlan(Node *parsetree, const char *sql_statement, List *querytree_list) case T_LoadStmt: case T_NotifyStmt: case T_PrepareStmt: - case T_RuleStmt: case T_UnlistenStmt: /* fall through */ default: ----------------------------------------------------------------------- Summary of changes: src/backend/parser/parse_utilcmd.c | 7 +++++++ src/backend/pgxc/plan/planner.c | 2 +- 2 files changed, 8 insertions(+), 1 deletions(-) hooks/post-receive -- Postgres-XC |
From: mason_s <ma...@us...> - 2010-06-27 19:10:31
|
Project "Postgres-XC". The branch, master has been updated via a32e437055fe9f9162651fd6edd811b77f443881 (commit) from 75127cbf9ff834aabc4e4f39f2628f7a9646a6ea (commit) - Log ----------------------------------------------------------------- commit a32e437055fe9f9162651fd6edd811b77f443881 Author: Mason S <masonsharp@mason-sharps-macbook.local> Date: Sun Jun 27 21:07:04 2010 +0200 Handle more types of queries to determine whether or not they can be safely executed within the current XC architecture. Checking is more thorough now, including detection of negative cases. Also fixed a bug with OR. In particular these changes handle subqueries in conditions and detect correlated joins that can be done on nodes when the data is colocated. diff --git a/src/backend/pgxc/plan/planner.c b/src/backend/pgxc/plan/planner.c index 0fb4a2b..7f45fb7 100644 --- a/src/backend/pgxc/plan/planner.c +++ b/src/backend/pgxc/plan/planner.c @@ -49,6 +49,16 @@ typedef struct long constant; /* assume long PGXCTODO - should be Datum */ } Literal_Comparison; +/* Parent-Child joins for relations being joined on + * their respective hash distribuion columns + */ +typedef struct +{ + RelationLocInfo *rel_loc_info1; + RelationLocInfo *rel_loc_info2; + OpExpr *opexpr; +} Parent_Child_Join; + /* * This struct helps us detect special conditions to determine what nodes * to execute on. @@ -56,7 +66,7 @@ typedef struct typedef struct { List *partitioned_literal_comps; /* List of Literal_Comparison */ - List *partitioned_parent_child; + List *partitioned_parent_child; /* List of Parent_Child_Join */ List *replicated_joins; /* @@ -96,6 +106,26 @@ typedef struct ColumnBase char *colname; } ColumnBase; +/* Used for looking for XC-safe queries + * + * rtables is a pointer to List, each item of which is + * the rtable for the particular query. This way we can use + * varlevelsup to resolve Vars in nested queries + */ +typedef struct XCWalkerContext +{ + Query *query; + bool isRead; + Exec_Nodes *exec_nodes; /* resulting execution nodes */ + Special_Conditions *conditions; + bool multilevel_join; + List *rtables; /* a pointer to a list of rtables */ + int varno; + bool within_or; + bool within_not; +} XCWalkerContext; + + /* A list of List*'s, one for each relation. */ List *join_list = NULL; @@ -105,6 +135,12 @@ bool StrictStatementChecking = true; /* Forbid multi-node SELECT statements with an ORDER BY clause */ bool StrictSelectChecking = false; + +static Exec_Nodes *get_plan_nodes(Query *query, bool isRead); +static bool get_plan_nodes_walker(Node *query_node, XCWalkerContext *context); +static bool examine_conditions_walker(Node *expr_node, XCWalkerContext *context); + + /* * True if both lists contain only one node and are the same */ @@ -113,7 +149,7 @@ same_single_node (List *nodelist1, List *nodelist2) { return nodelist1 && list_length(nodelist1) == 1 && nodelist2 && list_length(nodelist2) == 1 - && linitial_int(nodelist1) != linitial_int(nodelist2); + && linitial_int(nodelist1) == linitial_int(nodelist2); } /* @@ -234,6 +270,7 @@ free_special_relations(Special_Conditions *special_conditions) list_free(special_conditions->replicated_joins); pfree(special_conditions); + special_conditions = NULL; } /* @@ -246,6 +283,7 @@ free_join_list(void) return; list_free_deep(join_list); + join_list = NULL; } /* @@ -287,18 +325,28 @@ get_numeric_constant(Expr *expr) * This is required because a RangeTblEntry may actually be another * type, like a join, and we need to then look at the joinaliasvars * to determine what the base table and column really is. + * + * rtables is a List of rtable Lists. */ static ColumnBase* -get_base_var(Var *var, List *rtables) +get_base_var(Var *var, XCWalkerContext *context) { RangeTblEntry *rte; + List *col_rtable; /* Skip system attributes */ if (!AttrNumberIsForUserDefinedAttr(var->varattno)) return NULL; - /* get the RangeTableEntry */ - rte = list_nth(rtables, var->varno - 1); + /* + * Get the RangeTableEntry + * We take nested subqueries into account first, + * we may need to look further up the query tree. + * The most recent rtable is at the end of the list; top most one is first. + */ + Assert (list_length(context->rtables) - var->varlevelsup > 0); + col_rtable = list_nth(context->rtables, (list_length(context->rtables) - var->varlevelsup) - 1); + rte = list_nth(col_rtable, var->varno - 1); if (rte->rtekind == RTE_RELATION) { @@ -316,8 +364,7 @@ get_base_var(Var *var, List *rtables) Var *colvar = list_nth(rte->joinaliasvars, var->varattno - 1); /* continue resolving recursively */ - return get_base_var(colvar, rtables); - //may need to set this, toocolumn_base->relalias = rte->eref->aliasname; + return get_base_var(colvar, context); } else if (rte->rtekind == RTE_SUBQUERY) { @@ -332,10 +379,16 @@ get_base_var(Var *var, List *rtables) return NULL; /* not column based expressoin, return */ else { + ColumnBase *base; Var *colvar = (Var *) tle->expr; /* continue resolving recursively */ - return get_base_var(colvar, rte->subquery->rtable); + /* push onto rtables list */ + context->rtables = lappend(context->rtables, rte->subquery->rtable); + base = get_base_var(colvar, context); + /* pop from rtables list */ + context->rtables = list_delete_ptr(context->rtables, rte->subquery->rtable); + return base; } } @@ -403,7 +456,7 @@ get_plan_nodes_insert(Query *query) if (!IsA(tle->expr, Const)) { - eval_expr = eval_const_expressions(NULL, (Node *) tle->expr); + eval_expr = (Expr *) eval_const_expressions(NULL, (Node *) tle->expr); checkexpr = get_numeric_constant(eval_expr); } @@ -440,7 +493,7 @@ get_plan_nodes_insert(Query *query) /* - * examine_conditions + * examine_conditions_walker * * Examine conditions and find special ones to later help us determine * what tables can be joined together. Put findings in Special_Conditions @@ -453,66 +506,96 @@ get_plan_nodes_insert(Query *query) * If we encounter a cross-node join, we stop processing and return false, * otherwise true. * - * PGXCTODO: Recognize subqueries, and give up (long term allow safe ones). - * */ static bool -examine_conditions(Special_Conditions *conditions, List *rtables, Node *expr_node) +examine_conditions_walker(Node *expr_node, XCWalkerContext *context) { RelationLocInfo *rel_loc_info1, *rel_loc_info2; Const *constant; Expr *checkexpr; + bool result = false; + bool is_and = false; + Assert(!context); + if (expr_node == NULL) - return true; + return false; - if (rtables == NULL) + if (!context->rtables) return true; - if (conditions == NULL) - conditions = new_special_conditions(); + if (!context->conditions) + context->conditions = new_special_conditions(); - if (IsA(expr_node, BoolExpr)) + if (IsA(expr_node, Var)) + { + /* If we get here, that meant the previous call before recursing down did not + * find the condition safe yet. + * Since we pass down our context, this is the bit of code that will detect + * that we are using more than one relation in a condition which has not + * already been deemed safe. + */ + Var *var_node = (Var *) expr_node; + + if (context->varno) + { + if (var_node->varno != context->varno) + return true; + } + else + { + context->varno = var_node->varno; + return false; + } + } + + else if (IsA(expr_node, BoolExpr)) { BoolExpr *boolexpr = (BoolExpr *) expr_node; - /* Recursively handle ANDed expressions, but don't handle others */ if (boolexpr->boolop == AND_EXPR) + is_and = true; + if (boolexpr->boolop == NOT_EXPR) { - if (!examine_conditions(conditions, rtables, - linitial(boolexpr->args))) - return false; + bool save_within_not = context->within_not; + context->within_not = true; - return examine_conditions( - conditions, rtables, lsecond(boolexpr->args)); + if (examine_conditions_walker(linitial(boolexpr->args), context)) + { + context->within_not = save_within_not; + return true; + } + context->within_not = save_within_not; + return false; } else if (boolexpr->boolop == OR_EXPR) { - /* - * look at OR's as work-around for reported issue. - * NOTE: THIS IS NOT CORRECT, BUT JUST DONE FOR THE PROTOTYPE. - * More rigorous - * checking needs to be done. PGXCTODO: Add careful checking for - * OR'ed conditions... - */ - if (!examine_conditions(conditions, rtables, - linitial(boolexpr->args))) - return false; + bool save_within_or = context->within_or; + context->within_or = true; - return examine_conditions( - conditions, rtables, lsecond(boolexpr->args)); - } - else - /* looks complicated, give up */ - return false; + if (examine_conditions_walker(linitial(boolexpr->args), context)) + { + context->within_or = save_within_or; + return true; + } - return true; + if (examine_conditions_walker(lsecond(boolexpr->args), context)) + { + context->within_or = save_within_or; + return true; + } + context->within_or = save_within_or; + return false; + } } - - if (IsA(expr_node, OpExpr)) + /* + * Look for equality conditions on partiioned columns, but only do so + * if we are not in an OR or NOT expression + */ + if (!context->within_or && !context->within_not && IsA(expr_node, OpExpr)) { OpExpr *opexpr = (OpExpr *) expr_node; @@ -528,10 +611,10 @@ examine_conditions(Special_Conditions *conditions, List *rtables, Node *expr_nod /* get the RangeTableEntry */ Var *colvar = (Var *) arg1; - ColumnBase *column_base = get_base_var(colvar, rtables); + ColumnBase *column_base = get_base_var(colvar, context); if (!column_base) - return false; + return true; /* Look at other argument */ checkexpr = arg2; @@ -540,7 +623,7 @@ examine_conditions(Special_Conditions *conditions, List *rtables, Node *expr_nod if (!IsA(arg2, Const)) { /* this gets freed when the memory context gets freed */ - Expr *eval_expr = eval_const_expressions(NULL, (Node *) arg2); + Expr *eval_expr = (Expr *) eval_const_expressions(NULL, (Node *) arg2); checkexpr = get_numeric_constant(eval_expr); } @@ -555,7 +638,7 @@ examine_conditions(Special_Conditions *conditions, List *rtables, Node *expr_nod rel_loc_info1 = GetRelationLocInfo(column_base->relid); if (!rel_loc_info1) - return false; + return true; /* If hash partitioned, check if the part column was used */ if (IsHashColumn(rel_loc_info1, column_base->colname)) @@ -569,18 +652,17 @@ examine_conditions(Special_Conditions *conditions, List *rtables, Node *expr_nod lit_comp->col_name = column_base->colname; lit_comp->constant = constant->constvalue; - conditions->partitioned_literal_comps = lappend( - conditions->partitioned_literal_comps, + context->conditions->partitioned_literal_comps = lappend( + context->conditions->partitioned_literal_comps, lit_comp); - return true; + return false; } else { - /* unimportant comparison, just return */ + /* Continue walking below */ if (rel_loc_info1) FreeRelationLocInfo(rel_loc_info1); - return true; } } @@ -593,59 +675,56 @@ examine_conditions(Special_Conditions *conditions, List *rtables, Node *expr_nod rel_loc_info1 = GetRelationLocInfo(column_base->relid); if (!rel_loc_info1) - return false; + return true; - column_base2 = get_base_var(colvar2, rtables); + column_base2 = get_base_var(colvar2, context); if (!column_base2) - return false; + return true; rel_loc_info2 = GetRelationLocInfo(column_base2->relid); /* get data struct about these two relations joining */ pgxc_join = find_or_create_pgxc_join(column_base->relid, column_base->relalias, column_base2->relid, column_base2->relalias); - /* - * pgxc_join->condition_list = - * lappend(pgxc_join->condition_list, opexpr); - */ - if (rel_loc_info1->locatorType == LOCATOR_TYPE_REPLICATED) { /* add to replicated join conditions */ - conditions->replicated_joins = - lappend(conditions->replicated_joins, opexpr); + context->conditions->replicated_joins = + lappend(context->conditions->replicated_joins, opexpr); + + if (colvar->varlevelsup != colvar2->varlevelsup) + context->multilevel_join = true; if (rel_loc_info2->locatorType != LOCATOR_TYPE_REPLICATED) { /* Note other relation, saves us work later. */ - conditions->base_rel_name = column_base2->relname; - conditions->base_rel_loc_info = rel_loc_info2; + context->conditions->base_rel_name = column_base2->relname; + context->conditions->base_rel_loc_info = rel_loc_info2; if (rel_loc_info1) FreeRelationLocInfo(rel_loc_info1); } - if (conditions->base_rel_name == NULL) + if (context->conditions->base_rel_name == NULL) { - conditions->base_rel_name = column_base->relname; - conditions->base_rel_loc_info = rel_loc_info1; + context->conditions->base_rel_name = column_base->relname; + context->conditions->base_rel_loc_info = rel_loc_info1; if (rel_loc_info2) FreeRelationLocInfo(rel_loc_info2); } /* note nature of join between the two relations */ pgxc_join->join_type = JOIN_REPLICATED; - return true; + return false; } - - if (rel_loc_info2->locatorType == LOCATOR_TYPE_REPLICATED) + else if (rel_loc_info2->locatorType == LOCATOR_TYPE_REPLICATED) { /* add to replicated join conditions */ - conditions->replicated_joins = - lappend(conditions->replicated_joins, opexpr); + context->conditions->replicated_joins = + lappend(context->conditions->replicated_joins, opexpr); /* other relation not replicated, note it for later */ - conditions->base_rel_name = column_base->relname; - conditions->base_rel_loc_info = rel_loc_info1; + context->conditions->base_rel_name = column_base->relname; + context->conditions->base_rel_loc_info = rel_loc_info1; /* note nature of join between the two relations */ pgxc_join->join_type = JOIN_REPLICATED; @@ -653,11 +732,9 @@ examine_conditions(Special_Conditions *conditions, List *rtables, Node *expr_nod if (rel_loc_info2) FreeRelationLocInfo(rel_loc_info2); - return true; + return false; } - /* Now check for a partitioned join */ - /* * PGXCTODO - for the prototype, we assume all partitioned * tables are on the same nodes. @@ -666,36 +743,113 @@ examine_conditions(Special_Conditions *conditions, List *rtables, Node *expr_nod && IsHashColumn(rel_loc_info2, column_base2->colname)) { /* We found a partitioned join */ - conditions->partitioned_parent_child = - lappend(conditions->partitioned_parent_child, - opexpr); + Parent_Child_Join *parent_child = (Parent_Child_Join *) + palloc0(sizeof(Parent_Child_Join)); + + parent_child->rel_loc_info1 = rel_loc_info1; + parent_child->rel_loc_info2 = rel_loc_info2; + parent_child->opexpr = opexpr; + + context->conditions->partitioned_parent_child = + lappend(context->conditions->partitioned_parent_child, + parent_child); pgxc_join->join_type = JOIN_COLOCATED_PARTITIONED; - return true; + if (colvar->varlevelsup != colvar2->varlevelsup) + context->multilevel_join = true; + return false; } /* * At this point, there is some other type of join that * can probably not be executed on only a single node. - * Just return. Important: We preserve previous + * Just return, as it may be updated later. + * Important: We preserve previous * pgxc_join->join_type value, there may be multiple * columns joining two tables, and we want to make sure at * least one of them make it colocated partitioned, in * which case it will update it when examining another * condition. */ - return true; + return false; + } + } + } + } + + /* Handle subquery */ + if (IsA(expr_node, SubLink)) + { + List *current_rtable; + bool is_multilevel; + int save_parent_child_count = 0; + SubLink *sublink = (SubLink *) expr_node; + Exec_Nodes *save_exec_nodes = context->exec_nodes; /* Save old exec_nodes */ + + /* save parent-child count */ + if (context->exec_nodes) + save_parent_child_count = list_length(context->conditions->partitioned_parent_child); + + context->exec_nodes = NULL; + context->multilevel_join = false; + current_rtable = ((Query *) sublink->subselect)->rtable; + + /* push onto rtables list before recursing */ + context->rtables = lappend(context->rtables, current_rtable); + + if (get_plan_nodes_walker(sublink->subselect, context)) + return true; + + /* pop off (remove) rtable */ + context->rtables = list_delete_ptr(context->rtables, current_rtable); + + is_multilevel = context->multilevel_join; + context->multilevel_join = false; + + /* Allow for replicated tables */ + if (!context->exec_nodes) + context->exec_nodes = save_exec_nodes; + else + { + if (save_exec_nodes) + { + if (context->exec_nodes->tableusagetype == TABLE_USAGE_TYPE_USER_REPLICATED) + { + context->exec_nodes = save_exec_nodes; } else + { + if (save_exec_nodes->tableusagetype != TABLE_USAGE_TYPE_USER_REPLICATED) + { + /* See if they run on the same node */ + if (same_single_node (context->exec_nodes->nodelist, save_exec_nodes->nodelist)) + return false; + } + else + /* use old value */ + context->exec_nodes = save_exec_nodes; + } + } else + { + if (context->exec_nodes->tableusagetype == TABLE_USAGE_TYPE_USER_REPLICATED) + return false; + /* See if subquery safely joins with parent */ + if (!is_multilevel) return true; - } } - /* PGXCTODO - need to more finely examine other operators */ } - return true; + /* Keep on walking */ + result = expression_tree_walker(expr_node, examine_conditions_walker, (void *) context); + + /* Reset context->varno if is_and to detect cross-node operations */ + if (is_and) + context->varno = 0; + + return result; } + /* * examine_conditions_fromlist - Examine FROM clause for joins * @@ -703,46 +857,42 @@ examine_conditions(Special_Conditions *conditions, List *rtables, Node *expr_nod * to help us decide which nodes to execute on. */ static bool -examine_conditions_fromlist(Special_Conditions *conditions, List *rtables, - Node *treenode) +examine_conditions_fromlist(Node *treenode, XCWalkerContext *context) { - if (treenode == NULL) - return true; - - if (rtables == NULL) - return true; + return false; - if (conditions == NULL) - conditions = new_special_conditions(); + if (context->rtables == NULL) + return false; if (IsA(treenode, JoinExpr)) { JoinExpr *joinexpr = (JoinExpr *) treenode; /* recursively examine FROM join tree */ - if (!examine_conditions_fromlist(conditions, rtables, joinexpr->larg)) - return false; + if (examine_conditions_fromlist(joinexpr->larg, context)) + return true; - if (!examine_conditions_fromlist(conditions, rtables, joinexpr->rarg)) - return false; + if (examine_conditions_fromlist(joinexpr->rarg, context)) + return true; /* Now look at join condition */ - if (!examine_conditions(conditions, rtables, joinexpr->quals)) - return false; - return true; + if (examine_conditions_walker(joinexpr->quals, context)) + return true; + + return false; } else if (IsA(treenode, RangeTblRef)) - return true; + return false; else if (IsA(treenode, BoolExpr) ||IsA(treenode, OpExpr)) { /* check base condition, if possible */ - if (!examine_conditions(conditions, rtables, treenode)) - return false; + if (examine_conditions_walker(treenode, context)); + return true; } /* Some other more complicated beast */ - return false; + return true; } @@ -779,18 +929,15 @@ contains_only_pg_catalog (List *rtable) * * returns NULL if it appears to be a mutli-step query. */ -static Exec_Nodes * -get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) +static bool +get_plan_nodes_walker(Node *query_node, XCWalkerContext *context) { + Query *query; RangeTblEntry *rte; ListCell *lc, *item; - Special_Conditions *special_conditions; - OpExpr *opexpr; - Var *colvar; RelationLocInfo *rel_loc_info; Exec_Nodes *test_exec_nodes = NULL; - Exec_Nodes *exec_nodes = NULL; Exec_Nodes *current_nodes = NULL; Exec_Nodes *from_query_nodes = NULL; TableUsageType table_usage_type = TABLE_USAGE_TYPE_NO_TABLE; @@ -798,15 +945,14 @@ get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) int from_subquery_count = 0; - exec_nodes = NULL; - join_list = NULL; + if (!query_node && !IsA(query_node,Query)) + return true; + + query = (Query *) query_node; /* If no tables, just return */ if (query->rtable == NULL && query->jointree == NULL) - return NULL; - - /* Alloc and init struct */ - special_conditions = new_special_conditions(); + return false; /* Look for special conditions */ @@ -817,22 +963,19 @@ get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) if (IsA(treenode, JoinExpr)) { - if (!examine_conditions_fromlist(special_conditions, query->rtable, - treenode)) + if (examine_conditions_fromlist(treenode, context)) { /* May be complicated. Before giving up, just check for pg_catalog usage */ if (contains_only_pg_catalog (query->rtable)) { /* just pg_catalog tables */ - exec_nodes = (Exec_Nodes *) palloc0(sizeof(Exec_Nodes)); - exec_nodes->tableusagetype = TABLE_USAGE_TYPE_PGCATALOG; - free_special_relations(special_conditions); - return exec_nodes; + context->exec_nodes = (Exec_Nodes *) palloc0(sizeof(Exec_Nodes)); + context->exec_nodes->tableusagetype = TABLE_USAGE_TYPE_PGCATALOG; + return false; } /* complicated */ - free_special_relations(special_conditions); - return NULL; + return true; } } else if (IsA(treenode, RangeTblRef)) @@ -844,20 +987,34 @@ get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) if (rte->rtekind == RTE_SUBQUERY) { + Exec_Nodes *save_exec_nodes = context->exec_nodes; + Special_Conditions *save_conditions = context->conditions; /* Save old conditions */ + List *current_rtable = rte->subquery->rtable; + from_subquery_count++; + /* * Recursively call for subqueries. * Note this also works for views, which are rewritten as subqueries. */ - current_nodes = get_plan_nodes(query_plan, rte->subquery, isRead); + context->rtables = lappend(context->rtables, current_rtable); + context->conditions = (Special_Conditions *) palloc0(sizeof(Special_Conditions)); + + if (get_plan_nodes_walker((Node *) rte->subquery, context)) + return true; + + /* restore rtables and conditions */ + context->rtables = list_delete_ptr(context->rtables, current_rtable); + context->conditions = save_conditions; + + current_nodes = context->exec_nodes; + context->exec_nodes = save_exec_nodes; + if (current_nodes) current_usage_type = current_nodes->tableusagetype; else - { /* could be complicated */ - free_special_relations(special_conditions); - return NULL; - } + return true; /* We compare to make sure that the subquery is safe to execute with previous- * we may have multiple ones in the FROM clause. @@ -880,11 +1037,8 @@ get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) { /* Allow if they are both using one node, and the same one */ if (!same_single_node (from_query_nodes->nodelist, current_nodes->nodelist)) - { /* Complicated */ - free_special_relations(special_conditions); - return NULL; - } + return true; } } } @@ -904,18 +1058,13 @@ get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) current_usage_type = TABLE_USAGE_TYPE_PGCATALOG; else { - //current_usage_type = TABLE_USAGE_TYPE_USER; /* Complicated */ - free_special_relations(special_conditions); - return NULL; + return true; } } else - { /* could be complicated */ - free_special_relations(special_conditions); - return NULL; - } + return true; /* See if we have pg_catalog mixed with other tables */ if (table_usage_type == TABLE_USAGE_TYPE_NO_TABLE) @@ -923,34 +1072,27 @@ get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) else if (current_usage_type != table_usage_type) { /* mixed- too complicated for us for now */ - free_special_relations(special_conditions); - return NULL; + return true; } } else { /* could be complicated */ - free_special_relations(special_conditions); - return NULL; + return true; } } /* If we are just dealing with pg_catalog, just return */ if (table_usage_type == TABLE_USAGE_TYPE_PGCATALOG) { - exec_nodes = (Exec_Nodes *) palloc0(sizeof(Exec_Nodes)); - exec_nodes->tableusagetype = TABLE_USAGE_TYPE_PGCATALOG; - return exec_nodes; + context->exec_nodes = (Exec_Nodes *) palloc0(sizeof(Exec_Nodes)); + context->exec_nodes->tableusagetype = TABLE_USAGE_TYPE_PGCATALOG; + return false; } /* Examine the WHERE clause, too */ - if (!examine_conditions(special_conditions, query->rtable, - query->jointree->quals)) - { - /* if cross joins may exist, just return NULL */ - free_special_relations(special_conditions); - return NULL; - } + if (examine_conditions_walker(query->jointree->quals, context)) + return true; /* Examine join conditions, see if each join is single-node safe */ if (join_list != NULL) @@ -961,17 +1103,14 @@ get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) /* If it is not replicated or parent-child, not single-node safe */ if (pgxcjoin->join_type == JOIN_OTHER) - { - free_special_relations(special_conditions); - return NULL; - } + return true; } } /* check for non-partitioned cases */ - if (special_conditions->partitioned_parent_child == NULL && - special_conditions->partitioned_literal_comps == NULL) + if (context->conditions->partitioned_parent_child == NULL && + context->conditions->partitioned_literal_comps == NULL) { /* * We have either a single table, just replicated tables, or a @@ -980,7 +1119,7 @@ get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) */ /* See if we noted a table earlier to use */ - rel_loc_info = special_conditions->base_rel_loc_info; + rel_loc_info = context->conditions->base_rel_loc_info; if (rel_loc_info == NULL) { @@ -994,7 +1133,7 @@ get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) * If the query is rewritten (which can be due to rules or views), * ignore extra stuff. Also ignore subqueries we have processed */ - if (!rte->inFromCl || rte->rtekind != RTE_RELATION) + if ((!rte->inFromCl && query->commandType == CMD_SELECT) || rte->rtekind != RTE_RELATION) continue; /* PGXCTODO - handle RTEs that are functions */ @@ -1003,7 +1142,7 @@ get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) * Too complicated, we have multiple relations that still * cannot be joined safely */ - return NULL; + return true; rtesave = rte; } @@ -1014,35 +1153,35 @@ get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) rel_loc_info = GetRelationLocInfo(rtesave->relid); if (!rel_loc_info) - return NULL; + return true; - exec_nodes = GetRelationNodes(rel_loc_info, NULL, isRead); + context->exec_nodes = GetRelationNodes(rel_loc_info, NULL, context->isRead); } } else { - exec_nodes = GetRelationNodes(rel_loc_info, NULL, isRead); + context->exec_nodes = GetRelationNodes(rel_loc_info, NULL, context->isRead); } /* Note replicated table usage for determining safe queries */ - if (exec_nodes) + if (context->exec_nodes) { if (table_usage_type == TABLE_USAGE_TYPE_USER && IsReplicated(rel_loc_info)) table_usage_type = TABLE_USAGE_TYPE_USER_REPLICATED; - else - exec_nodes->tableusagetype = table_usage_type; + + context->exec_nodes->tableusagetype = table_usage_type; } } /* check for partitioned col comparison against a literal */ - else if (list_length(special_conditions->partitioned_literal_comps) > 0) + else if (list_length(context->conditions->partitioned_literal_comps) > 0) { - exec_nodes = NULL; + context->exec_nodes = NULL; /* * Make sure that if there are multiple such comparisons, that they * are all on the same nodes. */ - foreach(lc, special_conditions->partitioned_literal_comps) + foreach(lc, context->conditions->partitioned_literal_comps) { Literal_Comparison *lit_comp = (Literal_Comparison *) lfirst(lc); @@ -1050,14 +1189,13 @@ get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) lit_comp->rel_loc_info, &(lit_comp->constant), true); test_exec_nodes->tableusagetype = table_usage_type; - if (exec_nodes == NULL) - exec_nodes = test_exec_nodes; + if (context->exec_nodes == NULL) + context->exec_nodes = test_exec_nodes; else { - if (!same_single_node(exec_nodes->nodelist, test_exec_nodes->nodelist)) + if (!same_single_node(context->exec_nodes->nodelist, test_exec_nodes->nodelist)) { - free_special_relations(special_conditions); - return NULL; + return true; } } } @@ -1069,67 +1207,87 @@ get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) * no partitioned column comparison condition with a literal. We just * use one of the tables as a basis for node determination. */ - ColumnBase *column_base; - - opexpr = (OpExpr *) linitial(special_conditions->partitioned_parent_child); + Parent_Child_Join *parent_child; - colvar = (Var *) linitial(opexpr->args); + parent_child = (Parent_Child_Join *) + linitial(context->conditions->partitioned_parent_child); - /* get the RangeTableEntry */ - column_base = get_base_var(colvar, query->rtable); - if (!column_base) - return false; - - rel_loc_info = GetRelationLocInfo(column_base->relid); - if (!rel_loc_info) - return false; - - exec_nodes = GetRelationNodes(rel_loc_info, NULL, isRead); - exec_nodes->tableusagetype = table_usage_type; + context->exec_nodes = GetRelationNodes(parent_child->rel_loc_info1, NULL, context->isRead); + context->exec_nodes->tableusagetype = table_usage_type; } - free_special_relations(special_conditions); if (from_query_nodes) { - if (!exec_nodes) - return from_query_nodes; + if (!context->exec_nodes) + { + context->exec_nodes = from_query_nodes; + return false; + } /* Just use exec_nodes if the from subqueries are all replicated or using the exact * same node */ else if (from_query_nodes->tableusagetype == TABLE_USAGE_TYPE_USER_REPLICATED - || (same_single_node(from_query_nodes->nodelist, exec_nodes->nodelist))) - return exec_nodes; + || (same_single_node(from_query_nodes->nodelist, context->exec_nodes->nodelist))) + return false; else { - /* We allow views, where the (rewritten) subquery may be on all nodes, but the parent - * query applies a condition on the from subquery. + /* We allow views, where the (rewritten) subquery may be on all nodes, + * but the parent query applies a condition on the from subquery. */ if (list_length(query->jointree->fromlist) == from_subquery_count - && list_length(exec_nodes->nodelist) == 1) - return exec_nodes; + && list_length(context->exec_nodes->nodelist) == 1) + return false; } /* Too complicated, give up */ - return NULL; + return true; } - return exec_nodes; + return false; +} + + +/* + * Top level entry point before walking query to determine plan nodes + * + */ +static Exec_Nodes * +get_plan_nodes(Query *query, bool isRead) +{ + Exec_Nodes *result_nodes; + XCWalkerContext *context = palloc0(sizeof(XCWalkerContext)); + + context->query = query; + context->isRead = isRead; + + context->conditions = (Special_Conditions *) palloc0(sizeof(Special_Conditions)); + context->rtables = lappend(context->rtables, query->rtable); + + join_list = NULL; + + if (get_plan_nodes_walker((Node *) query, context)) + result_nodes = NULL; + else + result_nodes = context->exec_nodes; + + free_special_relations(context->conditions); + return result_nodes; } /* - * get_plan_nodes - determine the nodes to execute the plan on + * get_plan_nodes_command - determine the nodes to execute the plan on * * return NULL if it is not safe to be done in a single step. */ static Exec_Nodes * -get_plan_nodes_command(Query_Plan *query_plan, Query *query) +get_plan_nodes_command(Query *query) { Exec_Nodes *exec_nodes = NULL; switch (query->commandType) { case CMD_SELECT: - exec_nodes = get_plan_nodes(query_plan, query, true); + exec_nodes = get_plan_nodes(query, true); break; case CMD_INSERT: @@ -1139,7 +1297,7 @@ get_plan_nodes_command(Query_Plan *query_plan, Query *query) case CMD_UPDATE: case CMD_DELETE: /* treat as a select */ - exec_nodes = get_plan_nodes(query_plan, query, false); + exec_nodes = get_plan_nodes(query, false); break; default: @@ -1182,7 +1340,6 @@ get_plan_combine_type(Query *query, char baselocatortype) /* * Get list of simple aggregates used. - * For now we only allow MAX in the first column, and return a list of one. */ static List * get_simple_aggregates(Query * query) @@ -1439,11 +1596,14 @@ GetQueryPlan(Node *parsetree, const char *sql_statement, List *querytree_list) } query_step->exec_nodes = - get_plan_nodes_command(query_plan, query); + get_plan_nodes_command(query); if (query_step->exec_nodes) query_step->combine_type = get_plan_combine_type( query, query_step->exec_nodes->baselocatortype); - query_step->simple_aggregates = get_simple_aggregates(query); + /* Only set up if running on more than one node */ + if (query_step->exec_nodes && query_step->exec_nodes->nodelist && + list_length(query_step->exec_nodes->nodelist) > 1) + query_step->simple_aggregates = get_simple_aggregates(query); /* * See if it is a SELECT with no relations, like SELECT 1+1 or ----------------------------------------------------------------------- Summary of changes: src/backend/pgxc/plan/planner.c | 584 +++++++++++++++++++++++++-------------- 1 files changed, 372 insertions(+), 212 deletions(-) hooks/post-receive -- Postgres-XC |
From: Pavan D. <pa...@us...> - 2010-06-24 08:16:32
|
Project "Postgres-XC". The branch, master has been updated via 75127cbf9ff834aabc4e4f39f2628f7a9646a6ea (commit) from c0169fa52ff019450c45dd9e50502e12375f33f2 (commit) - Log ----------------------------------------------------------------- commit 75127cbf9ff834aabc4e4f39f2628f7a9646a6ea Author: Pavan Deolasee <pav...@gm...> Date: Thu Jun 24 13:45:29 2010 +0530 Add a missing include file from the previous commit diff --git a/src/include/commands/sequence.h b/src/include/commands/sequence.h index b5bb7d9..f54f74f 100644 --- a/src/include/commands/sequence.h +++ b/src/include/commands/sequence.h @@ -18,6 +18,9 @@ #include "access/xlog.h" #include "fmgr.h" +#ifdef PGXC +#include "utils/relcache.h" +#endif /* * On a machine with no 64-bit-int C datatype, sizeof(int64) will not be 8, ----------------------------------------------------------------------- Summary of changes: src/include/commands/sequence.h | 3 +++ 1 files changed, 3 insertions(+), 0 deletions(-) hooks/post-receive -- Postgres-XC |
From: Pavan D. <pa...@us...> - 2010-06-24 07:33:34
|
Project "Postgres-XC". The branch, master has been updated via c0169fa52ff019450c45dd9e50502e12375f33f2 (commit) from a216b00661e2b76267681bade35a620566fe9345 (commit) - Log ----------------------------------------------------------------- commit c0169fa52ff019450c45dd9e50502e12375f33f2 Author: Pavan Deolasee <pav...@gm...> Date: Thu Jun 24 13:00:09 2010 +0530 Add support for ALTER Sequence. Michael Paquier with some editorilization from Pavan Deolasee diff --git a/src/backend/access/transam/gtm.c b/src/backend/access/transam/gtm.c index c3cb72b..f9499c9 100644 --- a/src/backend/access/transam/gtm.c +++ b/src/backend/access/transam/gtm.c @@ -172,10 +172,8 @@ GetSnapshotGTM(GlobalTransactionId gxid, bool canbe_grouped) } -/** +/* * Create a sequence on the GTM. - * - * */ int CreateSequenceGTM(char *seqname, GTM_Sequence increment, GTM_Sequence minval, @@ -189,7 +187,45 @@ CreateSequenceGTM(char *seqname, GTM_Sequence increment, GTM_Sequence minval, return conn ? open_sequence(conn, &seqkey, increment, minval, maxval, startval, cycle) : 0; } -/** +/* + * Alter a sequence on the GTM + */ +int +AlterSequenceGTM(char *seqname, GTM_Sequence increment, GTM_Sequence minval, + GTM_Sequence maxval, GTM_Sequence startval, GTM_Sequence lastval, bool cycle, bool is_restart) +{ + GTM_SequenceKeyData seqkey; + CheckConnection(); + seqkey.gsk_keylen = strlen(seqname); + seqkey.gsk_key = seqname; + + return conn ? alter_sequence(conn, &seqkey, increment, minval, maxval, startval, lastval, cycle, is_restart) : 0; +} + +/* + * get the current sequence value + */ + +GTM_Sequence +GetCurrentValGTM(char *seqname) +{ + GTM_Sequence ret = -1; + GTM_SequenceKeyData seqkey; + CheckConnection(); + seqkey.gsk_keylen = strlen(seqname); + seqkey.gsk_key = seqname; + + if (conn) + ret = get_current(conn, &seqkey); + if (ret < 0) + { + CloseGTM(); + InitGTM(); + } + return ret; +} + +/* * Get the next sequence value */ GTM_Sequence @@ -211,7 +247,21 @@ GetNextValGTM(char *seqname) return ret; } -/** +/* + * Set values for sequence + */ +int +SetValGTM(char *seqname, GTM_Sequence nextval, bool iscalled) +{ + GTM_SequenceKeyData seqkey; + CheckConnection(); + seqkey.gsk_keylen = strlen(seqname); + seqkey.gsk_key = seqname; + + return conn ? set_val(conn, &seqkey, nextval, iscalled) : -1; +} + +/* * Drop the sequence */ int @@ -224,3 +274,19 @@ DropSequenceGTM(char *seqname) return conn ? close_sequence(conn, &seqkey) : -1; } + +/* + * Rename the sequence + */ +int +RenameSequenceGTM(char *seqname, const char *newseqname) +{ + GTM_SequenceKeyData seqkey, newseqkey; + CheckConnection(); + seqkey.gsk_keylen = strlen(seqname); + seqkey.gsk_key = seqname; + newseqkey.gsk_keylen = strlen(newseqname); + newseqkey.gsk_key = (char *)newseqname; + + return conn ? rename_sequence(conn, &seqkey, &newseqkey) : -1; +} diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index ba9a932..ba30206 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -35,6 +35,7 @@ #include "utils/lsyscache.h" #include "utils/resowner.h" #include "utils/syscache.h" +#include "commands/dbcommands.h" #ifdef PGXC #include "pgxc/pgxc.h" @@ -97,8 +98,13 @@ static int64 nextval_internal(Oid relid); static Relation open_share_lock(SeqTable seq); static void init_sequence(Oid relid, SeqTable *p_elm, Relation *p_rel); static Form_pg_sequence read_info(SeqTable elm, Relation rel, Buffer *buf); +#ifdef PGXC +static void init_params(List *options, bool isInit, + Form_pg_sequence new, List **owned_by, bool *is_restart); +#else static void init_params(List *options, bool isInit, - Form_pg_sequence new, List **owned_by); + Form_pg_sequence new, List **owned_by); +#endif static void do_setval(Oid relid, int64 next, bool iscalled); static void process_owned_by(Relation seqrel, List *owned_by); @@ -130,10 +136,15 @@ DefineSequence(CreateSeqStmt *seq) GTM_Sequence max_value = InvalidSequenceValue; GTM_Sequence increment = 1; bool cycle = false; + bool is_restart; #endif /* Check and set all option values */ +#ifdef PGXC + init_params(seq->options, true, &new, &owned_by, &is_restart); +#else init_params(seq->options, true, &new, &owned_by); +#endif /* * Create relation (and fill value[] and null[] for the tuple) @@ -341,14 +352,20 @@ DefineSequence(CreateSeqStmt *seq) #ifdef PGXC /* PGXC_COORD */ if (IS_PGXC_COORDINATOR) { + char *seqname = GetGlobalSeqName(rel, NULL); + /* We also need to create it on the GTM */ - if (CreateSequenceGTM(name.data, increment, min_value, max_value, + if (CreateSequenceGTM(seqname, + increment, + min_value, + max_value, start_value, cycle) < 0) { ereport(ERROR, (errcode(ERRCODE_CONNECTION_FAILURE), errmsg("GTM error, could not create sequence"))); } + pfree(seqname); } #endif } @@ -392,6 +409,15 @@ AlterSequenceInternal(Oid relid, List *options) Form_pg_sequence seq; FormData_pg_sequence new; List *owned_by; +#ifdef PGXC + GTM_Sequence start_value; + GTM_Sequence last_value; + GTM_Sequence min_value; + GTM_Sequence max_value; + GTM_Sequence increment; + bool cycle; + bool is_restart; +#endif /* open and AccessShareLock sequence */ init_sequence(relid, &elm, &seqrel); @@ -404,7 +430,11 @@ AlterSequenceInternal(Oid relid, List *options) memcpy(&new, seq, sizeof(FormData_pg_sequence)); /* Check and set new values */ +#ifdef PGXC + init_params(options, false, &new, &owned_by, &is_restart); +#else init_params(options, false, &new, &owned_by); +#endif /* Clear local cache so that we don't think we have cached numbers */ /* Note that we do not change the currval() state */ @@ -413,6 +443,15 @@ AlterSequenceInternal(Oid relid, List *options) /* Now okay to update the on-disk tuple */ memcpy(seq, &new, sizeof(FormData_pg_sequence)); +#ifdef PGXC + increment = new.increment_by; + min_value = new.min_value; + max_value = new.max_value; + start_value = new.start_value; + last_value = new.last_value; + cycle = new.is_cycled; +#endif + START_CRIT_SECTION(); MarkBufferDirty(buf); @@ -451,6 +490,27 @@ AlterSequenceInternal(Oid relid, List *options) process_owned_by(seqrel, owned_by); relation_close(seqrel, NoLock); + +#ifdef PGXC + if (IS_PGXC_COORDINATOR) + { + char *seqname = GetGlobalSeqName(seqrel, NULL); + + /* We also need to create it on the GTM */ + if (AlterSequenceGTM(seqname, + increment, + min_value, + max_value, + start_value, + last_value, + cycle, + is_restart) < 0) + ereport(ERROR, + (errcode(ERRCODE_CONNECTION_FAILURE), + errmsg("GTM error, could not alter sequence"))); + pfree(seqname); + } +#endif } @@ -527,14 +587,22 @@ nextval_internal(Oid relid) #ifdef PGXC /* PGXC_COORD */ if (IS_PGXC_COORDINATOR) { - /* Above, we still use the page as a locking mechanism to handle - * concurrency + char *seqname = GetGlobalSeqName(seqrel, NULL); + + /* + * Above, we still use the page as a locking mechanism to handle + * concurrency */ - result = (int64) GetNextValGTM(RelationGetRelationName(seqrel)); + result = (int64) GetNextValGTM(seqname); if (result < 0) ereport(ERROR, - (errcode(ERRCODE_CONNECTION_FAILURE), - errmsg("GTM error, could not obtain sequence value"))); + (errcode(ERRCODE_CONNECTION_FAILURE), + errmsg("GTM error, could not obtain sequence value"))); + pfree(seqname); + + /* Update the on-disk data */ + seq->last_value = result; /* last fetched number */ + seq->is_called = true; } else { #endif @@ -714,6 +782,22 @@ currval_oid(PG_FUNCTION_ARGS) /* open and AccessShareLock sequence */ init_sequence(relid, &elm, &seqrel); +#ifdef PGXC + if (IS_PGXC_COORDINATOR) + { + char *seqname = GetGlobalSeqName(seqrel, NULL); + + result = (int64) GetCurrentValGTM(seqname); + if (result < 0) + ereport(ERROR, + (errcode(ERRCODE_CONNECTION_FAILURE), + errmsg("GTM error, could not obtain sequence value"))); + pfree(seqname); + } + else + { +#endif + if (pg_class_aclcheck(elm->relid, GetUserId(), ACL_SELECT) != ACLCHECK_OK && pg_class_aclcheck(elm->relid, GetUserId(), ACL_USAGE) != ACLCHECK_OK) ereport(ERROR, @@ -729,6 +813,10 @@ currval_oid(PG_FUNCTION_ARGS) result = elm->last; +#ifdef PGXC + } +#endif + relation_close(seqrel, NoLock); PG_RETURN_INT64(result); @@ -820,6 +908,24 @@ do_setval(Oid relid, int64 next, bool iscalled) bufm, bufx))); } +#ifdef PGXC + if (IS_PGXC_COORDINATOR) + { + char *seqname = GetGlobalSeqName(seqrel, NULL); + + if (SetValGTM(seqname, next, iscalled) < 0) + ereport(ERROR, + (errcode(ERRCODE_CONNECTION_FAILURE), + errmsg("GTM error, could not obtain sequence value"))); + pfree(seqname); + /* Update the on-disk data */ + seq->last_value = next; /* last fetched number */ + seq->is_called = iscalled; + } + else + { +#endif + /* Set the currval() state only if iscalled = true */ if (iscalled) { @@ -872,6 +978,10 @@ do_setval(Oid relid, int64 next, bool iscalled) END_CRIT_SECTION(); +#ifdef PGXC + } +#endif + UnlockReleaseBuffer(buf); relation_close(seqrel, NoLock); @@ -1050,8 +1160,13 @@ read_info(SeqTable elm, Relation rel, Buffer *buf) * otherwise, do not change existing options that aren't explicitly overridden. */ static void +#ifdef PGXC +init_params(List *options, bool isInit, + Form_pg_sequence new, List **owned_by, bool *is_restart) +#else init_params(List *options, bool isInit, Form_pg_sequence new, List **owned_by) +#endif { DefElem *start_value = NULL; DefElem *restart_value = NULL; @@ -1062,6 +1177,10 @@ init_params(List *options, bool isInit, DefElem *is_cycled = NULL; ListCell *option; +#ifdef PGXC + *is_restart = false; +#endif + *owned_by = NIL; foreach(option, options) @@ -1227,8 +1346,8 @@ init_params(List *options, bool isInit, snprintf(bufm, sizeof(bufm), INT64_FORMAT, new->max_value); ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("START value (%s) cannot be greater than MAXVALUE (%s)", - bufs, bufm))); + errmsg("START value (%s) cannot be greater than MAXVALUE (%s)", + bufs, bufm))); } /* RESTART [WITH] */ @@ -1238,6 +1357,9 @@ init_params(List *options, bool isInit, new->last_value = defGetInt64(restart_value); else new->last_value = new->start_value; +#ifdef PGXC + *is_restart = true; +#endif new->is_called = false; new->log_cnt = 1; } @@ -1258,8 +1380,8 @@ init_params(List *options, bool isInit, snprintf(bufm, sizeof(bufm), INT64_FORMAT, new->min_value); ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("RESTART value (%s) cannot be less than MINVALUE (%s)", - bufs, bufm))); + errmsg("RESTART value (%s) cannot be less than MINVALUE (%s)", + bufs, bufm))); } if (new->last_value > new->max_value) { @@ -1270,8 +1392,8 @@ init_params(List *options, bool isInit, snprintf(bufm, sizeof(bufm), INT64_FORMAT, new->max_value); ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("RESTART value (%s) cannot be greater than MAXVALUE (%s)", - bufs, bufm))); + errmsg("RESTART value (%s) cannot be greater than MAXVALUE (%s)", + bufs, bufm))); } /* CACHE */ @@ -1293,6 +1415,49 @@ init_params(List *options, bool isInit, new->cache_value = 1; } +#ifdef PGXC +/* + * Returns a global sequence name adapted to GTM + * Name format is dbname.schemaname.seqname + * so as to identify in a unique way in the whole cluster each sequence + */ + +char * +GetGlobalSeqName(Relation seqrel, const char *new_seqname) +{ + char *seqname, *dbname, *schemaname, *relname; + int charlen; + + /* Get all the necessary relation names */ + dbname = get_database_name(seqrel->rd_node.dbNode); + schemaname = get_namespace_name(RelationGetNamespace(seqrel)); + + if (new_seqname) + relname = new_seqname; + else + relname = RelationGetRelationName(seqrel); + + /* Calculate the global name size including the dots and \0 */ + charlen = strlen(dbname) + strlen(schemaname) + strlen(relname) + 3; + seqname = (char *) palloc(charlen); + + /* Form a unique sequence name with schema and database name for GTM */ + snprintf(seqname, + charlen, + "%s.%s.%s", + dbname, + schemaname, + relname); + + if (dbname) + pfree(dbname); + if (schemaname) + pfree(schemaname); + + return seqname; +} +#endif + /* * Process an OWNED BY option for CREATE/ALTER SEQUENCE * diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index fa6456a..33782c4 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -768,14 +768,27 @@ RemoveRelations(DropStmt *drop) add_exact_object_address(&obj, objects); - #ifdef PGXC /* PGXC_COORD */ /* PGXCTODO: allow the ability to rollback dropping sequences. */ /* Drop the sequence */ if (IS_PGXC_COORDINATOR && classform->relkind == RELKIND_SEQUENCE) { - DropSequenceGTM(rel->relname); + Relation relseq; + char *seqname; + + /* + * A relation is opened to get the schema and database name as + * such data is not available before when dropping a function. + */ + relseq = relation_open(obj.objectId, AccessShareLock); + seqname = GetGlobalSeqName(relseq, NULL); + + DropSequenceGTM(seqname); + pfree(seqname); + + /* Then close the relation opened previously */ + relation_close(relseq, AccessShareLock); } #endif ReleaseSysCache(tuple); @@ -2103,6 +2116,20 @@ RenameRelation(Oid myrelid, const char *newrelname, ObjectType reltype) /* Do the work */ RenameRelationInternal(myrelid, newrelname, namespaceId); +#ifdef PGXC + if (IS_PGXC_COORDINATOR && + (reltype == OBJECT_SEQUENCE || relkind == RELKIND_SEQUENCE)) /* It is possible to rename a sequence with ALTER TABLE */ + { + char *seqname = GetGlobalSeqName(targetrelation, NULL); + char *newseqname = GetGlobalSeqName(targetrelation, newrelname); + + /* We also need to rename it on the GTM */ + if (RenameSequenceGTM(seqname, newseqname) < 0) + ereport(ERROR, + (errcode(ERRCODE_CONNECTION_FAILURE), + errmsg("GTM error, could not rename sequence"))); + } +#endif /* * Close rel, but keep exclusive lock! diff --git a/src/backend/pgxc/plan/planner.c b/src/backend/pgxc/plan/planner.c index 0d73fc9..0fb4a2b 100644 --- a/src/backend/pgxc/plan/planner.c +++ b/src/backend/pgxc/plan/planner.c @@ -1466,14 +1466,9 @@ GetQueryPlan(Node *parsetree, const char *sql_statement, List *querytree_list) { query_plan->exec_loc_type = EXEC_ON_DATA_NODES; - /* - * If the nodelist is NULL, it is not safe for us to - * execute - */ - if (!query_step->exec_nodes && StrictStatementChecking) - ereport(ERROR, - (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), - (errmsg("Cannot safely execute statement in a single step.")))); + /* If node list is NULL, execute on coordinator */ + if (!query_step->exec_nodes) + query_plan->exec_loc_type = EXEC_ON_COORD; } } @@ -1517,19 +1512,10 @@ GetQueryPlan(Node *parsetree, const char *sql_statement, List *querytree_list) break; /* Statements that we only want to execute on the Coordinator */ - case T_CreateSeqStmt: case T_VariableShowStmt: query_plan->exec_loc_type = EXEC_ON_COORD; break; - /* DROP */ - case T_DropStmt: - if (((DropStmt *) parsetree)->removeType == OBJECT_SEQUENCE) - query_plan->exec_loc_type = EXEC_ON_COORD; - else - query_plan->exec_loc_type = EXEC_ON_COORD | EXEC_ON_DATA_NODES; - break; - /* * Statements that need to run in autocommit mode, on Coordinator * and Data Nodes with suppressed implicit two phase commit. @@ -1542,51 +1528,7 @@ GetQueryPlan(Node *parsetree, const char *sql_statement, List *querytree_list) query_plan->exec_loc_type = EXEC_ON_COORD | EXEC_ON_DATA_NODES; query_plan->force_autocommit = true; break; - case T_AlterObjectSchemaStmt: - /* Sequences are just defined on coordinator */ - if (((AlterObjectSchemaStmt *) parsetree)->objectType == OBJECT_SEQUENCE) - query_plan->exec_loc_type = EXEC_ON_COORD; - else - query_plan->exec_loc_type = EXEC_ON_COORD | EXEC_ON_DATA_NODES; - break; - case T_AlterSeqStmt: - /* Alter sequence is not supported yet, it needs complementary interactions with GTM */ - ereport(ERROR, - (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), - (errmsg("This command is not yet supported")))); - break; - case T_AlterTableStmt: - /* - * ALTER SEQUENCE needs some interactions with GTM, - * this query is not supported yet. - */ - if (((AlterTableStmt *) parsetree)->relkind == OBJECT_SEQUENCE) - ereport(ERROR, - (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), - (errmsg("Cannot yet alter a sequence")))); - else - query_plan->exec_loc_type = EXEC_ON_COORD | EXEC_ON_DATA_NODES; - break; - case T_CommentStmt: - /* Sequences are only defined on coordinator */ - if (((CommentStmt *) parsetree)->objtype == OBJECT_SEQUENCE) - query_plan->exec_loc_type = EXEC_ON_COORD; - else - query_plan->exec_loc_type = EXEC_ON_COORD | EXEC_ON_DATA_NODES; - break; - case T_RenameStmt: - /* Sequences are only defined on coordinator */ - if (((RenameStmt *) parsetree)->renameType == OBJECT_SEQUENCE) - /* - * Renaming a sequence requires interactions with GTM - * what is not supported yet - */ - ereport(ERROR, - (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), - (errmsg("Sequence renaming not yet supported, you should drop it and created a new one")))); - else - query_plan->exec_loc_type = EXEC_ON_COORD | EXEC_ON_DATA_NODES; - break; + case T_DropPropertyStmt: /* * Triggers are not yet supported by PGXC @@ -1619,10 +1561,14 @@ GetQueryPlan(Node *parsetree, const char *sql_statement, List *querytree_list) case T_AlterFdwStmt: case T_AlterForeignServerStmt: case T_AlterFunctionStmt: + case T_AlterObjectSchemaStmt: case T_AlterOpFamilyStmt: + case T_AlterSeqStmt: + case T_AlterTableStmt: /* Can also be used to rename a sequence */ case T_AlterTSConfigurationStmt: case T_AlterTSDictionaryStmt: - case T_ClosePortalStmt: /* In case CLOSE ALL is issued */ + case T_ClosePortalStmt: /* In case CLOSE ALL is issued */ + case T_CommentStmt: case T_CompositeTypeStmt: case T_ConstraintsSetStmt: case T_CreateCastStmt: @@ -1635,19 +1581,22 @@ GetQueryPlan(Node *parsetree, const char *sql_statement, List *querytree_list) case T_CreateOpClassStmt: case T_CreateOpFamilyStmt: case T_CreatePLangStmt: + case T_CreateSeqStmt: case T_CreateSchemaStmt: - case T_DeallocateStmt: /* Allow for DEALLOCATE ALL */ + case T_DeallocateStmt: /* Allow for DEALLOCATE ALL */ case T_DiscardStmt: case T_DropCastStmt: case T_DropFdwStmt: case T_DropForeignServerStmt: case T_DropPLangStmt: + case T_DropStmt: case T_IndexStmt: case T_LockStmt: case T_ReindexStmt: case T_RemoveFuncStmt: case T_RemoveOpClassStmt: case T_RemoveOpFamilyStmt: + case T_RenameStmt: case T_TruncateStmt: case T_VariableSetStmt: case T_ViewStmt: @@ -1695,11 +1644,11 @@ GetQueryPlan(Node *parsetree, const char *sql_statement, List *querytree_list) * data node will do */ case T_ExplainStmt: - if (((ExplainStmt *) parsetree)->analyze) + if (((ExplainStmt *) parsetree)->analyze) ereport(ERROR, (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), (errmsg("ANALYZE with EXPLAIN is currently not supported.")))); - + query_step->exec_nodes = palloc0(sizeof(Exec_Nodes)); query_step->exec_nodes->nodelist = GetAnyDataNode(); query_step->exec_nodes->baselocatortype = LOCATOR_TYPE_RROBIN; diff --git a/src/gtm/client/fe-protocol.c b/src/gtm/client/fe-protocol.c index a102202..051bb1d 100644 --- a/src/gtm/client/fe-protocol.c +++ b/src/gtm/client/fe-protocol.c @@ -493,12 +493,16 @@ gtmpqParseSuccess(GTM_Conn *conn, GTM_Result *result) case SEQUENCE_INIT_RESULT: case SEQUENCE_RESET_RESULT: case SEQUENCE_CLOSE_RESULT: + case SEQUENCE_RENAME_RESULT: + case SEQUENCE_ALTER_RESULT: + case SEQUENCE_SET_VAL_RESULT: if (gtmpqReadSeqKey(&result->gr_resdata.grd_seqkey, conn)) result->gr_status = -1; break; case SEQUENCE_GET_CURRENT_RESULT: case SEQUENCE_GET_NEXT_RESULT: + case SEQUENCE_GET_LAST_RESULT: if (gtmpqReadSeqKey(&result->gr_resdata.grd_seq.seqkey, conn)) { result->gr_status = -1; @@ -566,6 +570,9 @@ gtmpqFreeResultData(GTM_Result *result, bool is_proxy) case SEQUENCE_INIT_RESULT: case SEQUENCE_RESET_RESULT: case SEQUENCE_CLOSE_RESULT: + case SEQUENCE_RENAME_RESULT: + case SEQUENCE_ALTER_RESULT: + case SEQUENCE_SET_VAL_RESULT: if (result->gr_resdata.grd_seqkey.gsk_key != NULL) free(result->gr_resdata.grd_seqkey.gsk_key); result->gr_resdata.grd_seqkey.gsk_key = NULL; @@ -573,6 +580,7 @@ gtmpqFreeResultData(GTM_Result *result, bool is_proxy) case SEQUENCE_GET_CURRENT_RESULT: case SEQUENCE_GET_NEXT_RESULT: + case SEQUENCE_GET_LAST_RESULT: if (result->gr_resdata.grd_seq.seqkey.gsk_key != NULL) free(result->gr_resdata.grd_seq.seqkey.gsk_key); result->gr_resdata.grd_seqkey.gsk_key = NULL; diff --git a/src/gtm/client/gtm_client.c b/src/gtm/client/gtm_client.c index 089689e..9df28c7 100644 --- a/src/gtm/client/gtm_client.c +++ b/src/gtm/client/gtm_client.c @@ -356,6 +356,51 @@ send_failed: } int +alter_sequence(GTM_Conn *conn, GTM_SequenceKey key, GTM_Sequence increment, + GTM_Sequence minval, GTM_Sequence maxval, + GTM_Sequence startval, GTM_Sequence lastval, bool cycle, bool is_restart) +{ + GTM_Result *res = NULL; + time_t finish_time; + + /* Start the message. */ + if (gtmpqPutMsgStart('C', true, conn) || + gtmpqPutInt(MSG_SEQUENCE_ALTER, sizeof (GTM_MessageType), conn) || + gtmpqPutInt(key->gsk_keylen, 4, conn) || + gtmpqPutnchar(key->gsk_key, key->gsk_keylen, conn) || + gtmpqPutnchar((char *)&increment, sizeof (GTM_Sequence), conn) || + gtmpqPutnchar((char *)&minval, sizeof (GTM_Sequence), conn) || + gtmpqPutnchar((char *)&maxval, sizeof (GTM_Sequence), conn) || + gtmpqPutnchar((char *)&startval, sizeof (GTM_Sequence), conn) || + gtmpqPutnchar((char *)&lastval, sizeof (GTM_Sequence), conn) || + gtmpqPutc(cycle, conn) || + gtmpqPutc(is_restart, conn)) + goto send_failed; + + /* Finish the message. */ + if (gtmpqPutMsgEnd(conn)) + goto send_failed; + + /* Flush to ensure backend gets it. */ + if (gtmpqFlush(conn)) + goto send_failed; + + finish_time = time(NULL) + CLIENT_GTM_TIMEOUT; + if (gtmpqWaitTimed(true, false, conn, finish_time) || + gtmpqReadData(conn) < 0) + goto receive_failed; + + if ((res = GTMPQgetResult(conn)) == NULL) + goto receive_failed; + + return res->gr_status; + +receive_failed: +send_failed: + return -1; +} + +int close_sequence(GTM_Conn *conn, GTM_SequenceKey key) { GTM_Result *res = NULL; @@ -391,6 +436,44 @@ send_failed: return -1; } +int +rename_sequence(GTM_Conn *conn, GTM_SequenceKey key, GTM_SequenceKey newkey) +{ + GTM_Result *res = NULL; + time_t finish_time; + + /* Start the message. */ + if (gtmpqPutMsgStart('C', true, conn) || + gtmpqPutInt(MSG_SEQUENCE_RENAME, sizeof (GTM_MessageType), conn) || + gtmpqPutInt(key->gsk_keylen, 4, conn) || + gtmpqPutnchar(key->gsk_key, key->gsk_keylen, conn)|| + gtmpqPutInt(newkey->gsk_keylen, 4, conn) || + gtmpqPutnchar(newkey->gsk_key, newkey->gsk_keylen, conn)) + goto send_failed; + + /* Finish the message. */ + if (gtmpqPutMsgEnd(conn)) + goto send_failed; + + /* Flush to ensure backend gets it. */ + if (gtmpqFlush(conn)) + goto send_failed; + + finish_time = time(NULL) + CLIENT_GTM_TIMEOUT; + if (gtmpqWaitTimed(true, false, conn, finish_time) || + gtmpqReadData(conn) < 0) + goto receive_failed; + + if ((res = GTMPQgetResult(conn)) == NULL) + goto receive_failed; + + return res->gr_status; + + receive_failed: + send_failed: + return -1; +} + GTM_Sequence get_current(GTM_Conn *conn, GTM_SequenceKey key) { @@ -430,13 +513,51 @@ send_failed: return -1; } +int +set_val(GTM_Conn *conn, GTM_SequenceKey key, GTM_Sequence nextval, bool iscalled) +{ + GTM_Result *res = NULL; + time_t finish_time; + + /* Start the message. */ + if (gtmpqPutMsgStart('C', true, conn) || + gtmpqPutInt(MSG_SEQUENCE_SET_VAL, sizeof (GTM_MessageType), conn) || + gtmpqPutInt(key->gsk_keylen, 4, conn) || + gtmpqPutnchar(key->gsk_key, key->gsk_keylen, conn) || + gtmpqPutnchar((char *)&nextval, sizeof (GTM_Sequence), conn) || + gtmpqPutc(iscalled, conn)) + goto send_failed; + + /* Finish the message. */ + if (gtmpqPutMsgEnd(conn)) + goto send_failed; + + /* Flush to ensure backend gets it. */ + if (gtmpqFlush(conn)) + goto send_failed; + + finish_time = time(NULL) + CLIENT_GTM_TIMEOUT; + if (gtmpqWaitTimed(true, false, conn, finish_time) || + gtmpqReadData(conn) < 0) + goto receive_failed; + + if ((res = GTMPQgetResult(conn)) == NULL) + goto receive_failed; + + return res->gr_status; + +receive_failed: +send_failed: + return -1; +} + GTM_Sequence get_next(GTM_Conn *conn, GTM_SequenceKey key) { GTM_Result *res = NULL; time_t finish_time; - /* Start the message. */ + /* Start the message. */ if (gtmpqPutMsgStart('C', true, conn) || gtmpqPutInt(MSG_SEQUENCE_GET_NEXT, sizeof (GTM_MessageType), conn) || gtmpqPutInt(key->gsk_keylen, 4, conn) || diff --git a/src/gtm/main/gtm_seq.c b/src/gtm/main/gtm_seq.c index 73af34e..8611f40 100644 --- a/src/gtm/main/gtm_seq.c +++ b/src/gtm/main/gtm_seq.c @@ -326,9 +326,12 @@ GTM_SeqOpen(GTM_SequenceKey seqkey, */ seqinfo->gs_cycle = cycle; + /* Set the last value in case of a future restart */ + seqinfo->gs_last_value = seqinfo->gs_init_value; + if ((errcode = seq_add_seqinfo(seqinfo))) { - GTM_RWLockDestroy(&seqinfo->gs_lock); + GTM_RWLockDestroy(&seqinfo->gs_lock); pfree(seqinfo->gs_key); pfree(seqinfo); } @@ -336,6 +339,62 @@ GTM_SeqOpen(GTM_SequenceKey seqkey, } /* + * Alter a sequence + */ +int GTM_SeqAlter(GTM_SequenceKey seqkey, + GTM_Sequence increment_by, + GTM_Sequence minval, + GTM_Sequence maxval, + GTM_Sequence startval, + GTM_Sequence lastval, + bool cycle, + bool is_restart) +{ + GTM_SeqInfo *seqinfo = seq_find_seqinfo(seqkey); + + if (seqinfo == NULL) + { + ereport(LOG, + (EINVAL, + errmsg("The sequence with the given key does not exist"))); + return EINVAL; + } + + GTM_RWLockAcquire(&seqinfo->gs_lock, GTM_LOCKMODE_WRITE); + + /* Modify the data if necessary */ + + if (seqinfo->gs_cycle != cycle) + seqinfo->gs_cycle = cycle; + if (seqinfo->gs_min_value != minval) + seqinfo->gs_min_value = minval; + if (seqinfo->gs_max_value != maxval) + seqinfo->gs_max_value = maxval; + if (seqinfo->gs_increment_by != increment_by) + seqinfo->gs_increment_by = increment_by; + + /* Here Restart has been used with a value, reinitialize last_value to a new value */ + if (seqinfo->gs_last_value != lastval) + seqinfo->gs_last_value = lastval; + + /* Start has been used, reinitialize init value */ + if (seqinfo->gs_init_value != startval) + seqinfo->gs_last_value = seqinfo->gs_init_value = startval; + + /* Restart command has been used, reset the sequence */ + if (is_restart) + { + seqinfo->gs_called = false; + seqinfo->gs_init_value = seqinfo->gs_last_value; + } + + /* Remove the old key with the old name */ + GTM_RWLockRelease(&seqinfo->gs_lock); + seq_release_seqinfo(seqinfo); + return 0; +} + +/* * Restore a sequence. */ static int @@ -367,7 +426,7 @@ GTM_SeqRestore(GTM_SequenceKey seqkey, seqinfo->gs_min_value = minval; seqinfo->gs_max_value = maxval; - seqinfo->gs_init_value = startval; + seqinfo->gs_init_value = seqinfo->gs_last_value = startval; seqinfo->gs_value = curval; /* @@ -402,6 +461,66 @@ GTM_SeqClose(GTM_SequenceKey seqkey) } /* + * Rename an existing sequence with a new name + */ +int +GTM_SeqRename(GTM_SequenceKey seqkey, GTM_SequenceKey newseqkey) +{ + GTM_SeqInfo *seqinfo = seq_find_seqinfo(seqkey); + GTM_SeqInfo *newseqinfo = NULL; + int errcode = 0; + + /* replace old key by new key */ + if (seqinfo == NULL) + { + ereport(LOG, + (EINVAL, + errmsg("The sequence with the given key does not exist"))); + return EINVAL; + } + + /* Now create the new sequence info */ + newseqinfo = (GTM_SeqInfo *) palloc(sizeof (GTM_SeqInfo)); + + GTM_RWLockAcquire(&seqinfo->gs_lock, GTM_LOCKMODE_WRITE); + GTM_RWLockInit(&newseqinfo->gs_lock); + + newseqinfo->gs_ref_count = 0; + newseqinfo->gs_key = seq_copy_key(newseqkey); + newseqinfo->gs_state = seqinfo->gs_state; + newseqinfo->gs_called = seqinfo->gs_called; + + newseqinfo->gs_increment_by = seqinfo->gs_increment_by; + newseqinfo->gs_min_value = seqinfo->gs_min_value; + newseqinfo->gs_max_value = seqinfo->gs_max_value; + + newseqinfo->gs_init_value = seqinfo->gs_init_value; + newseqinfo->gs_value = seqinfo->gs_value; + newseqinfo->gs_cycle = seqinfo->gs_cycle; + + newseqinfo->gs_state = seqinfo->gs_state; + newseqinfo->gs_last_value = seqinfo->gs_last_value; + + /* Add the copy to the list */ + if ((errcode = seq_add_seqinfo(newseqinfo))) /* a lock is taken here for the new sequence */ + { + GTM_RWLockDestroy(&newseqinfo->gs_lock); + pfree(newseqinfo->gs_key); + pfree(newseqinfo); + return errcode; + } + + /* Remove the old key with the old name */ + GTM_RWLockRelease(&seqinfo->gs_lock); + /* Release first the structure as it has been taken previously */ + seq_release_seqinfo(seqinfo); + + /* Then close properly the old sequence */ + GTM_SeqClose(seqkey); + return errcode; +} + +/* * Get current value for the sequence without incrementing it */ GTM_Sequence @@ -436,7 +555,37 @@ GTM_SeqGetCurrent(GTM_SequenceKey seqkey) } /* - * Get next vlaue for the sequence + * Set values for the sequence + */ +int +GTM_SeqSetVal(GTM_SequenceKey seqkey, GTM_Sequence nextval, bool iscalled) +{ + GTM_SeqInfo *seqinfo = seq_find_seqinfo(seqkey); + + if (seqinfo == NULL) + { + ereport(LOG, + (EINVAL, + errmsg("The sequence with the given key does not exist"))); + return EINVAL; + } + + GTM_RWLockAcquire(&seqinfo->gs_lock, GTM_LOCKMODE_WRITE); + + if (seqinfo->gs_value != nextval) + seqinfo->gs_value = nextval; + if (seqinfo->gs_called != iscalled) + seqinfo->gs_called = iscalled; + + /* Remove the old key with the old name */ + GTM_RWLockRelease(&seqinfo->gs_lock); + seq_release_seqinfo(seqinfo); + + return 0; +} + +/* + * Get next value for the sequence */ GTM_Sequence GTM_SeqGetNext(GTM_SequenceKey seqkey) @@ -625,6 +774,75 @@ ProcessSequenceInitCommand(Port *myport, StringInfo message) } /* + * Process MSG_SEQUENCE_ALTER message + */ +void +ProcessSequenceAlterCommand(Port *myport, StringInfo message) +{ + GTM_SequenceKeyData seqkey; + GTM_Sequence increment, minval, maxval, startval, lastval; + bool cycle, is_restart; + StringInfoData buf; + int errcode; + MemoryContext oldContext; + + /* + * Get the sequence key + */ + seqkey.gsk_keylen = pq_getmsgint(message, sizeof (seqkey.gsk_keylen)); + seqkey.gsk_key = (char *)pq_getmsgbytes(message, seqkey.gsk_keylen); + + /* + * Read various sequence parameters + */ + memcpy(&increment, pq_getmsgbytes(message, sizeof (GTM_Sequence)), + sizeof (GTM_Sequence)); + memcpy(&minval, pq_getmsgbytes(message, sizeof (GTM_Sequence)), + sizeof (GTM_Sequence)); + memcpy(&maxval, pq_getmsgbytes(message, sizeof (GTM_Sequence)), + sizeof (GTM_Sequence)); + memcpy(&startval, pq_getmsgbytes(message, sizeof (GTM_Sequence)), + sizeof (GTM_Sequence)); + memcpy(&lastval, pq_getmsgbytes(message, sizeof (GTM_Sequence)), + sizeof (GTM_Sequence)); + + cycle = pq_getmsgbyte(message); + is_restart = pq_getmsgbyte(message); + + /* + * We must use the TopMostMemoryContext because the sequence information is + * not bound to a thread and can outlive any of the thread specific + * contextes. + */ + oldContext = MemoryContextSwitchTo(TopMostMemoryContext); + + if (GTM_SeqAlter(&seqkey, increment, minval, maxval, startval, lastval, cycle, is_restart)) + ereport(ERROR, + (errcode, + errmsg("Failed to open a new sequence"))); + + MemoryContextSwitchTo(oldContext); + + pq_getmsgend(message); + + pq_beginmessage(&buf, 'S'); + pq_sendint(&buf, SEQUENCE_ALTER_RESULT, 4); + if (myport->is_proxy) + { + GTM_ProxyMsgHeader proxyhdr; + proxyhdr.ph_conid = myport->conn_id; + pq_sendbytes(&buf, (char *)&proxyhdr, sizeof (GTM_ProxyMsgHeader)); + } + pq_sendint(&buf, seqkey.gsk_keylen, 4); + pq_sendbytes(&buf, seqkey.gsk_key, seqkey.gsk_keylen); + pq_endmessage(myport, &buf); + + if (!myport->is_proxy) + pq_flush(myport); +} + + +/* * Process MSG_SEQUENCE_GET_CURRENT message */ void @@ -697,6 +915,63 @@ ProcessSequenceGetNextCommand(Port *myport, StringInfo message) } /* + * Process MSG_SEQUENCE_SET_VAL message + */ +void +ProcessSequenceSetValCommand(Port *myport, StringInfo message) +{ + GTM_SequenceKeyData seqkey; + GTM_Sequence nextval; + MemoryContext oldContext; + StringInfoData buf; + bool iscalled; + int errcode; + + /* + * Get the sequence key + */ + seqkey.gsk_keylen = pq_getmsgint(message, sizeof (seqkey.gsk_keylen)); + seqkey.gsk_key = (char *)pq_getmsgbytes(message, seqkey.gsk_keylen); + + /* Read parameters to be set */ + memcpy(&nextval, pq_getmsgbytes(message, sizeof (GTM_Sequence)), + sizeof (GTM_Sequence)); + + iscalled = pq_getmsgbyte(message); + + /* + * We must use the TopMostMemoryContext because the sequence information is + * not bound to a thread and can outlive any of the thread specific + * contextes. + */ + oldContext = MemoryContextSwitchTo(TopMostMemoryContext); + + if (GTM_SeqSetVal(&seqkey, nextval, iscalled)) + ereport(ERROR, + (errcode, + errmsg("Failed to set values of sequence"))); + + MemoryContextSwitchTo(oldContext); + + pq_getmsgend(message); + + pq_beginmessage(&buf, 'S'); + pq_sendint(&buf, SEQUENCE_SET_VAL_RESULT, 4); + if (myport->is_proxy) + { + GTM_ProxyMsgHeader proxyhdr; + proxyhdr.ph_conid = myport->conn_id; + pq_sendbytes(&buf, (char *)&proxyhdr, sizeof (GTM_ProxyMsgHeader)); + } + pq_sendint(&buf, seqkey.gsk_keylen, 4); + pq_sendbytes(&buf, seqkey.gsk_key, seqkey.gsk_keylen); + pq_endmessage(myport, &buf); + + if (!myport->is_proxy) + pq_flush(myport); +} + +/* * Process MSG_SEQUENCE_RESET message */ void @@ -764,6 +1039,58 @@ ProcessSequenceCloseCommand(Port *myport, StringInfo message) pq_flush(myport); } +/* + * Process MSG_SEQUENCE_RENAME message + */ +void +ProcessSequenceRenameCommand(Port *myport, StringInfo message) +{ + GTM_SequenceKeyData seqkey, newseqkey; + StringInfoData buf; + int errcode; + MemoryContext oldContext; + + /* get the message from backend */ + seqkey.gsk_keylen = pq_getmsgint(message, sizeof (seqkey.gsk_keylen)); + seqkey.gsk_key = (char *)pq_getmsgbytes(message, seqkey.gsk_keylen); + + /* Get the rest of the message, new name length and string with new name */ + newseqkey.gsk_keylen = pq_getmsgint(message, sizeof (newseqkey.gsk_keylen)); + newseqkey.gsk_key = (char *)pq_getmsgbytes(message, newseqkey.gsk_keylen); + + /* + * As when creating a sequence, we must use the TopMostMemoryContext + * because the sequence information is not bound to a thread and + * can outlive any of the thread specific contextes. + */ + oldContext = MemoryContextSwitchTo(TopMostMemoryContext); + + if ((errcode = GTM_SeqRename(&seqkey, &newseqkey))) + ereport(ERROR, + (errcode, + errmsg("Can not rename the sequence"))); + + MemoryContextSwitchTo(oldContext); + + pq_getmsgend(message); + + /* Send a SUCCESS message back to the client */ + pq_beginmessage(&buf, 'S'); + pq_sendint(&buf, SEQUENCE_RENAME_RESULT, 4); + if (myport->is_proxy) + { + GTM_ProxyMsgHeader proxyhdr; + proxyhdr.ph_conid = myport->conn_id; + pq_sendbytes(&buf, (char *)&proxyhdr, sizeof (GTM_ProxyMsgHeader)); + } + pq_sendint(&buf, newseqkey.gsk_keylen, 4); + pq_sendbytes(&buf, newseqkey.gsk_key, newseqkey.gsk_keylen); + pq_endmessage(myport, &buf); + + if (!myport->is_proxy) + pq_flush(myport); +} + void GTM_SaveSeqInfo(int ctlfd) { diff --git a/src/gtm/main/main.c b/src/gtm/main/main.c index 0ef09c4..667967a 100644 --- a/src/gtm/main/main.c +++ b/src/gtm/main/main.c @@ -72,7 +72,7 @@ static void ProcessCommand(Port *myport, StringInfo input_message); static void ProcessCoordinatorCommand(Port *myport, GTM_MessageType mtype, StringInfo message); static void ProcessTransactionCommand(Port *myport, GTM_MessageType mtype, StringInfo message); static void ProcessSnapshotCommand(Port *myport, GTM_MessageType mtype, StringInfo message); -static void ProcessSeqeunceCommand(Port *myport, GTM_MessageType mtype, StringInfo message); +static void ProcessSequenceCommand(Port *myport, GTM_MessageType mtype, StringInfo message); static void ProcessQueryCommand(Port *myport, GTM_MessageType mtype, StringInfo message); static void GTM_RegisterCoordinator(Port *myport, GTM_CoordinatorId coordinator_id); @@ -761,16 +761,16 @@ ProcessCommand(Port *myport, StringInfo input_message) switch (mtype) { - case MSG_UNREGISTER_COORD: + case MSG_UNREGISTER_COORD: ProcessCoordinatorCommand(myport, mtype, input_message); break; - case MSG_TXN_BEGIN: - case MSG_TXN_BEGIN_GETGXID: - case MSG_TXN_BEGIN_GETGXID_AUTOVACUUM: - case MSG_TXN_PREPARE: - case MSG_TXN_COMMIT: - case MSG_TXN_ROLLBACK: + case MSG_TXN_BEGIN: + case MSG_TXN_BEGIN_GETGXID: + case MSG_TXN_BEGIN_GETGXID_AUTOVACUUM: + case MSG_TXN_PREPARE: + case MSG_TXN_COMMIT: + case MSG_TXN_ROLLBACK: case MSG_TXN_GET_GXID: case MSG_TXN_BEGIN_GETGXID_MULTI: case MSG_TXN_COMMIT_MULTI: @@ -778,18 +778,22 @@ ProcessCommand(Port *myport, StringInfo input_message) ProcessTransactionCommand(myport, mtype, input_message); break; - case MSG_SNAPSHOT_GET: + case MSG_SNAPSHOT_GET: case MSG_SNAPSHOT_GXID_GET: case MSG_SNAPSHOT_GET_MULTI: ProcessSnapshotCommand(myport, mtype, input_message); break; - case MSG_SEQUENCE_INIT: + case MSG_SEQUENCE_INIT: case MSG_SEQUENCE_GET_CURRENT: case MSG_SEQUENCE_GET_NEXT: + case MSG_SEQUENCE_GET_LAST: + case MSG_SEQUENCE_SET_VAL: case MSG_SEQUENCE_RESET: case MSG_SEQUENCE_CLOSE: - ProcessSeqeunceCommand(myport, mtype, input_message); + case MSG_SEQUENCE_RENAME: + case MSG_SEQUENCE_ALTER: + ProcessSequenceCommand(myport, mtype, input_message); break; case MSG_TXN_GET_STATUS: @@ -1003,14 +1007,18 @@ ProcessSnapshotCommand(Port *myport, GTM_MessageType mtype, StringInfo message) } static void -ProcessSeqeunceCommand(Port *myport, GTM_MessageType mtype, StringInfo message) +ProcessSequenceCommand(Port *myport, GTM_MessageType mtype, StringInfo message) { switch (mtype) { - case MSG_SEQUENCE_INIT: + case MSG_SEQUENCE_INIT: ProcessSequenceInitCommand(myport, message); break; + case MSG_SEQUENCE_ALTER: + ProcessSequenceAlterCommand(myport, message); + break; + case MSG_SEQUENCE_GET_CURRENT: ProcessSequenceGetCurrentCommand(myport, message); break; @@ -1019,6 +1027,10 @@ ProcessSeqeunceCommand(Port *myport, GTM_MessageType mtype, StringInfo message) ProcessSequenceGetNextCommand(myport, message); break; + case MSG_SEQUENCE_SET_VAL: + ProcessSequenceSetValCommand(myport, message); + break; + case MSG_SEQUENCE_RESET: ProcessSequenceResetCommand(myport, message); break; @@ -1027,6 +1039,10 @@ ProcessSeqeunceCommand(Port *myport, GTM_MessageType mtype, StringInfo message) ProcessSequenceCloseCommand(myport, message); break; + case MSG_SEQUENCE_RENAME: + ProcessSequenceRenameCommand(myport, message); + break; + default: Assert(0); /* Shouldn't come here.. keep compiler quite */ } diff --git a/src/include/access/gtm.h b/src/include/access/gtm.h index 66ca3f1..3831f09 100644 --- a/src/include/access/gtm.h +++ b/src/include/access/gtm.h @@ -25,9 +25,17 @@ extern GlobalTransactionId BeginTranAutovacuumGTM(void); extern int CommitTranGTM(GlobalTransactionId gxid); extern int RollbackTranGTM(GlobalTransactionId gxid); extern GTM_Snapshot GetSnapshotGTM(GlobalTransactionId gxid, bool canbe_grouped); + +/* Sequence interface APIs with GTM */ +extern GTM_Sequence GetCurrentValGTM(char *seqname); extern GTM_Sequence GetNextValGTM(char *seqname); +extern int SetValGTM(char *seqname, GTM_Sequence nextval, bool iscalled); extern int CreateSequenceGTM(char *seqname, GTM_Sequence increment, GTM_Sequence minval, GTM_Sequence maxval, GTM_Sequence startval, bool cycle); +extern int AlterSequenceGTM(char *seqname, GTM_Sequence increment, + GTM_Sequence minval, GTM_Sequence maxval, GTM_Sequence startval, + GTM_Sequence lastval, bool cycle, bool is_restart); extern int DropSequenceGTM(char *seqname); +extern int RenameSequenceGTM(char *seqname, const char *newseqname); #endif /* ACCESS_GTM_H */ diff --git a/src/include/commands/sequence.h b/src/include/commands/sequence.h index 8a2c506..b5bb7d9 100644 --- a/src/include/commands/sequence.h +++ b/src/include/commands/sequence.h @@ -99,4 +99,8 @@ extern void AlterSequenceInternal(Oid relid, List *options); extern void seq_redo(XLogRecPtr lsn, XLogRecord *rptr); extern void seq_desc(StringInfo buf, uint8 xl_info, char *rec); +#ifdef PGXC +extern char *GetGlobalSeqName(Relation rel, const char *new_seqname); +#endif + #endif /* SEQUENCE_H */ diff --git a/src/include/gtm/gtm_client.h b/src/include/gtm/gtm_client.h index 0ccc232..05e44bf 100644 --- a/src/include/gtm/gtm_client.h +++ b/src/include/gtm/gtm_client.h @@ -120,9 +120,14 @@ GTM_SnapshotData *get_snapshot(GTM_Conn *conn, GlobalTransactionId gxid, int open_sequence(GTM_Conn *conn, GTM_SequenceKey key, GTM_Sequence increment, GTM_Sequence minval, GTM_Sequence maxval, GTM_Sequence startval, bool cycle); +int alter_sequence(GTM_Conn *conn, GTM_SequenceKey key, GTM_Sequence increment, + GTM_Sequence minval, GTM_Sequence maxval, + GTM_Sequence startval, GTM_Sequence lastval, bool cycle, bool is_restart); int close_sequence(GTM_Conn *conn, GTM_SequenceKey key); +int rename_sequence(GTM_Conn *conn, GTM_SequenceKey key, GTM_SequenceKey newkey); GTM_Sequence get_current(GTM_Conn *conn, GTM_SequenceKey key); GTM_Sequence get_next(GTM_Conn *conn, GTM_SequenceKey key); +int set_val(GTM_Conn *conn, GTM_SequenceKey key, GTM_Sequence nextval, bool is_called); int reset_sequence(GTM_Conn *conn, GTM_SequenceKey key); diff --git a/src/include/gtm/gtm_msg.h b/src/include/gtm/gtm_msg.h index cae0614..e76e762 100644 --- a/src/include/gtm/gtm_msg.h +++ b/src/include/gtm/gtm_msg.h @@ -34,11 +34,15 @@ typedef enum GTM_MessageType MSG_SEQUENCE_INIT, /* Initialize a new global sequence */ MSG_SEQUENCE_GET_CURRENT,/* Get the current value of sequence */ MSG_SEQUENCE_GET_NEXT, /* Get the next sequence value of sequence */ + MSG_SEQUENCE_GET_LAST, /* Get the last sequence value of sequence */ + MSG_SEQUENCE_SET_VAL, /* Set values for sequence */ MSG_SEQUENCE_RESET, /* Reset the sequence */ MSG_SEQUENCE_CLOSE, /* Close a previously inited sequence */ + MSG_SEQUENCE_RENAME, /* Rename a sequence */ + MSG_SEQUENCE_ALTER, /* Alter a sequence */ MSG_TXN_GET_STATUS, /* Get status of a given transaction */ MSG_TXN_GET_ALL_PREPARED, /* Get information about all outstanding - * prepared transactions */ + * prepared transactions */ MSG_TXN_BEGIN_GETGXID_AUTOVACUUM, /* Start a new transaction and get GXID for autovacuum */ MSG_DATA_FLUSH, /* flush pending data */ MSG_BACKEND_DISCONNECT, /* tell GTM that the backend diconnected from the proxy */ @@ -66,8 +70,12 @@ typedef enum GTM_ResultType SEQUENCE_INIT_RESULT, SEQUENCE_GET_CURRENT_RESULT, SEQUENCE_GET_NEXT_RESULT, + SEQUENCE_GET_LAST_RESULT, + SEQUENCE_SET_VAL_RESULT, SEQUENCE_RESET_RESULT, SEQUENCE_CLOSE_RESULT, + SEQUENCE_RENAME_RESULT, + SEQUENCE_ALTER_RESULT, TXN_GET_STATUS_RESULT, TXN_GET_ALL_PREPARED_RESULT, TXN_BEGIN_GETGXID_AUTOVACUUM_RESULT, diff --git a/src/include/gtm/gtm_seq.h b/src/include/gtm/gtm_seq.h index 6cb8cb3..7dc1e3e 100644 --- a/src/include/gtm/gtm_seq.h +++ b/src/include/gtm/gtm_seq.h @@ -23,6 +23,7 @@ typedef struct GTM_SeqInfo GTM_SequenceKey gs_key; GTM_Sequence gs_value; GTM_Sequence gs_init_value; + GTM_Sequence gs_last_value; GTM_Sequence gs_increment_by; GTM_Sequence gs_min_value; GTM_Sequence gs_max_value; @@ -57,17 +58,30 @@ int GTM_SeqOpen(GTM_SequenceKey seqkey, GTM_Sequence maxval, GTM_Sequence startval, bool cycle); -int GTM_SeqClose(GTM_SequenceKey sqkey); +int GTM_SeqAlter(GTM_SequenceKey seqkey, + GTM_Sequence increment_by, + GTM_Sequence minval, + GTM_Sequence maxval, + GTM_Sequence startval, + GTM_Sequence lastval, + bool cycle, + bool is_restart); +int GTM_SeqClose(GTM_SequenceKey seqkey); +int GTM_SeqRename(GTM_SequenceKey seqkey, GTM_SequenceKey newseqkey); GTM_Sequence GTM_SeqGetNext(GTM_SequenceKey seqkey); GTM_Sequence GTM_SeqGetCurrent(GTM_SequenceKey seqkey); +int GTM_SeqSetVal(GTM_SequenceKey seqkey, GTM_Sequence nextval, bool iscalled); int GTM_SeqReset(GTM_SequenceKey seqkey); void ProcessSequenceInitCommand(Port *myport, StringInfo message); void ProcessSequenceGetCurrentCommand(Port *myport, StringInfo message); void ProcessSequenceGetNextCommand(Port *myport, StringInfo message); +void ProcessSequenceSetValCommand(Port *myport, StringInfo message); void ProcessSequenceResetCommand(Port *myport, StringInfo message); void ProcessSequenceCloseCommand(Port *myport, StringInfo message); +void ProcessSequenceRenameCommand(Port *myport, StringInfo message); +void ProcessSequenceAlterCommand(Port *myport, StringInfo message); void GTM_SaveSeqInfo(int ctlfd); void GTM_RestoreSeqInfo(int ctlfd); ----------------------------------------------------------------------- Summary of changes: src/backend/access/transam/gtm.c | 76 ++++++++- src/backend/commands/sequence.c | 191 ++++++++++++++++++++-- src/backend/commands/tablecmds.c | 31 ++++- src/backend/pgxc/plan/planner.c | 81 ++-------- src/gtm/client/fe-protocol.c | 8 + src/gtm/client/gtm_client.c | 123 ++++++++++++++- src/gtm/main/gtm_seq.c | 333 +++++++++++++++++++++++++++++++++++++- src/gtm/main/main.c | 42 ++++-- src/include/access/gtm.h | 8 + src/include/commands/sequence.h | 4 + src/include/gtm/gtm_client.h | 5 + src/include/gtm/gtm_msg.h | 10 +- src/include/gtm/gtm_seq.h | 16 ++- 13 files changed, 823 insertions(+), 105 deletions(-) hooks/post-receive -- Postgres-XC |
From: mason_s <ma...@us...> - 2010-06-16 15:25:18
|
Project "Postgres-XC". The branch, master has been updated via a216b00661e2b76267681bade35a620566fe9345 (commit) from a0429d3f49568868602efb8881a79c3716201134 (commit) - Log ----------------------------------------------------------------- commit a216b00661e2b76267681bade35a620566fe9345 Author: Mason S <masonsharp@mason-sharps-macbook.local> Date: Wed Jun 16 11:24:34 2010 -0400 Do not yet allow creation of temp tables until we properly handle them. diff --git a/src/backend/pgxc/plan/planner.c b/src/backend/pgxc/plan/planner.c index cc8a664..0d73fc9 100644 --- a/src/backend/pgxc/plan/planner.c +++ b/src/backend/pgxc/plan/planner.c @@ -1601,6 +1601,15 @@ GetQueryPlan(Node *parsetree, const char *sql_statement, List *querytree_list) query_plan->exec_loc_type = EXEC_ON_COORD | EXEC_ON_DATA_NODES; break; + case T_CreateStmt: + if (((CreateStmt *)parsetree)->relation->istemp) + ereport(ERROR, + (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), + (errmsg("Temp tables are not yet supported.")))); + + query_plan->exec_loc_type = EXEC_ON_COORD | EXEC_ON_DATA_NODES; + break; + /* * Statements that we execute on both the Coordinator and Data Nodes */ @@ -1626,7 +1635,6 @@ GetQueryPlan(Node *parsetree, const char *sql_statement, List *querytree_list) case T_CreateOpClassStmt: case T_CreateOpFamilyStmt: case T_CreatePLangStmt: - case T_CreateStmt: case T_CreateSchemaStmt: case T_DeallocateStmt: /* Allow for DEALLOCATE ALL */ case T_DiscardStmt: ----------------------------------------------------------------------- Summary of changes: src/backend/pgxc/plan/planner.c | 10 +++++++++- 1 files changed, 9 insertions(+), 1 deletions(-) hooks/post-receive -- Postgres-XC |
From: mason_s <ma...@us...> - 2010-06-16 15:08:42
|
Project "Postgres-XC". The branch, master has been updated via a0429d3f49568868602efb8881a79c3716201134 (commit) from b65c64d294d9a91583534d951b758c5bccacea48 (commit) - Log ----------------------------------------------------------------- commit a0429d3f49568868602efb8881a79c3716201134 Author: Mason S <masonsharp@mason-sharps-macbook.local> Date: Wed Jun 16 11:07:40 2010 -0400 Do not allow WITH RECURSIVE or windowing functions until we add support for them. diff --git a/src/backend/pgxc/plan/planner.c b/src/backend/pgxc/plan/planner.c index 78c13a1..cc8a664 100644 --- a/src/backend/pgxc/plan/planner.c +++ b/src/backend/pgxc/plan/planner.c @@ -403,7 +403,7 @@ get_plan_nodes_insert(Query *query) if (!IsA(tle->expr, Const)) { - eval_expr = eval_const_expressions(NULL, tle->expr); + eval_expr = eval_const_expressions(NULL, (Node *) tle->expr); checkexpr = get_numeric_constant(eval_expr); } @@ -540,7 +540,7 @@ examine_conditions(Special_Conditions *conditions, List *rtables, Node *expr_nod if (!IsA(arg2, Const)) { /* this gets freed when the memory context gets freed */ - Expr *eval_expr = eval_const_expressions(NULL, arg2); + Expr *eval_expr = eval_const_expressions(NULL, (Node *) arg2); checkexpr = get_numeric_constant(eval_expr); } @@ -1413,6 +1413,31 @@ GetQueryPlan(Node *parsetree, const char *sql_statement, List *querytree_list) case T_DeleteStmt: /* just use first one in querytree_list */ query = (Query *) linitial(querytree_list); + + /* Perform some checks to make sure we can support the statement */ + if (nodeTag(parsetree) == T_SelectStmt) + { + if (query->intoClause) + ereport(ERROR, + (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), + (errmsg("INTO clause not yet supported")))); + + if (query->setOperations) + ereport(ERROR, + (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), + (errmsg("UNION, INTERSECT and EXCEPT are not yet supported")))); + + if (query->hasRecursive) + ereport(ERROR, + (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), + (errmsg("WITH RECURSIVE not yet supported")))); + + if (query->hasWindowFuncs) + ereport(ERROR, + (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), + (errmsg("Window functions not yet supported")))); + } + query_step->exec_nodes = get_plan_nodes_command(query_plan, query); if (query_step->exec_nodes) @@ -1463,16 +1488,6 @@ GetQueryPlan(Node *parsetree, const char *sql_statement, List *querytree_list) */ if (nodeTag(parsetree) == T_SelectStmt) { - if (query->intoClause) - ereport(ERROR, - (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), - (errmsg("INTO clause not yet supported")))); - - if (query->setOperations) - ereport(ERROR, - (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), - (errmsg("UNION, INTERSECT and EXCEPT are not yet supported")))); - if (StrictStatementChecking && query_step->exec_nodes && list_length(query_step->exec_nodes->nodelist) > 1) { ----------------------------------------------------------------------- Summary of changes: src/backend/pgxc/plan/planner.c | 39 +++++++++++++++++++++++++++------------ 1 files changed, 27 insertions(+), 12 deletions(-) hooks/post-receive -- Postgres-XC |
From: mason_s <ma...@us...> - 2010-06-16 14:04:53
|
Project "Postgres-XC". The branch, master has been updated via b65c64d294d9a91583534d951b758c5bccacea48 (commit) from 4a16b67e0239abda5f2ca8ec45489b7fc906ec4b (commit) - Log ----------------------------------------------------------------- commit b65c64d294d9a91583534d951b758c5bccacea48 Author: Mason S <masonsharp@mason-sharps-macbook.local> Date: Wed Jun 16 10:02:05 2010 -0400 When using hash distributed tables and a value that corresponds to the distribution column, if it is an expression containing constants, try and evaluate it to determine the destination execution node. This corresponds to bug 3008130. diff --git a/src/backend/pgxc/plan/planner.c b/src/backend/pgxc/plan/planner.c index 5e318dc..78c13a1 100644 --- a/src/backend/pgxc/plan/planner.c +++ b/src/backend/pgxc/plan/planner.c @@ -23,6 +23,7 @@ #include "lib/stringinfo.h" #include "nodes/nodeFuncs.h" #include "nodes/parsenodes.h" +#include "optimizer/clauses.h" #include "parser/parse_agg.h" #include "parser/parse_coerce.h" #include "pgxc/locator.h" @@ -257,7 +258,6 @@ free_join_list(void) static Expr * get_numeric_constant(Expr *expr) { - if (expr == NULL) return NULL; @@ -356,6 +356,7 @@ get_plan_nodes_insert(Query *query) ListCell *lc; long part_value; long *part_value_ptr = NULL; + Expr *eval_expr = NULL; /* Looks complex (correlated?) - best to skip */ if (query->jointree != NULL && query->jointree->fromlist != NULL) @@ -398,7 +399,13 @@ get_plan_nodes_insert(Query *query) if (strcmp(tle->resname, rel_loc_info->partAttrName) == 0) { /* We may have a cast, try and handle it */ - Expr *checkexpr = get_numeric_constant(tle->expr); + Expr *checkexpr = tle->expr; + + if (!IsA(tle->expr, Const)) + { + eval_expr = eval_const_expressions(NULL, tle->expr); + checkexpr = get_numeric_constant(eval_expr); + } if (checkexpr == NULL) break; /* no constant */ @@ -425,6 +432,9 @@ get_plan_nodes_insert(Query *query) /* single call handles both replicated and partitioned types */ exec_nodes = GetRelationNodes(rel_loc_info, part_value_ptr, false); + if (eval_expr) + pfree(eval_expr); + return exec_nodes; } @@ -524,9 +534,15 @@ examine_conditions(Special_Conditions *conditions, List *rtables, Node *expr_nod return false; /* Look at other argument */ + checkexpr = arg2; - /* We may have a cast, try and handle it */ - checkexpr = get_numeric_constant(arg2); + /* We may have a cast or expression, try and handle it */ + if (!IsA(arg2, Const)) + { + /* this gets freed when the memory context gets freed */ + Expr *eval_expr = eval_const_expressions(NULL, arg2); + checkexpr = get_numeric_constant(eval_expr); + } if (checkexpr != NULL) arg2 = checkexpr; ----------------------------------------------------------------------- Summary of changes: src/backend/pgxc/plan/planner.c | 24 ++++++++++++++++++++---- 1 files changed, 20 insertions(+), 4 deletions(-) hooks/post-receive -- Postgres-XC |
From: mason_s <ma...@us...> - 2010-06-15 18:59:22
|
Project "Postgres-XC". The branch, master has been updated via 4a16b67e0239abda5f2ca8ec45489b7fc906ec4b (commit) from ffe244ab59c464283ac1833e13377782bee1c122 (commit) - Log ----------------------------------------------------------------- commit 4a16b67e0239abda5f2ca8ec45489b7fc906ec4b Author: Mason S <masonsharp@mason-sharps-macbook.local> Date: Tue Jun 15 14:54:23 2010 -0400 Add support for views. A view is in effect rewritten into a FROM clause subquery, so we also add support of detecting safe FROM clause subqueries. We check after the query tree is rewritten if the query is safe to execute in a single step, include FROM clause subqueries. If not, we do not allow it. diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index cc4413c..a8f9d30 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -878,6 +878,12 @@ AddRelationDistribution (Oid relid, case DISTTYPE_HASH: /* User specified hash column, validate */ attnum = get_attnum(relid, distributeby->colname); + if (!attnum) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("Invalid distribution column specified"))); + } if (!IsHashDistributable(descriptor->attrs[attnum-1]->atttypid)) { diff --git a/src/backend/pgxc/plan/planner.c b/src/backend/pgxc/plan/planner.c index 13a0f8b..5e318dc 100644 --- a/src/backend/pgxc/plan/planner.c +++ b/src/backend/pgxc/plan/planner.c @@ -86,6 +86,15 @@ typedef struct PGXCJoinType join_type; } PGXC_Join; +/* used for base column in an expression */ +typedef struct ColumnBase +{ + int relid; + char *relname; + char *relalias; + char *colname; +} ColumnBase; + /* A list of List*'s, one for each relation. */ List *join_list = NULL; @@ -96,6 +105,17 @@ bool StrictStatementChecking = true; bool StrictSelectChecking = false; /* + * True if both lists contain only one node and are the same + */ +static bool +same_single_node (List *nodelist1, List *nodelist2) +{ + return nodelist1 && list_length(nodelist1) == 1 + && nodelist2 && list_length(nodelist2) == 1 + && linitial_int(nodelist1) != linitial_int(nodelist2); +} + +/* * Create a new join struct for tracking how relations are joined */ static PGXC_Join * @@ -224,7 +244,6 @@ free_join_list(void) if (join_list == NULL) return; - /* free all items in list including PGXC_Join struct */ list_free_deep(join_list); } @@ -269,7 +288,7 @@ get_numeric_constant(Expr *expr) * type, like a join, and we need to then look at the joinaliasvars * to determine what the base table and column really is. */ -static Var * +static ColumnBase* get_base_var(Var *var, List *rtables) { RangeTblEntry *rte; @@ -282,18 +301,45 @@ get_base_var(Var *var, List *rtables) rte = list_nth(rtables, var->varno - 1); if (rte->rtekind == RTE_RELATION) - return var; + { + ColumnBase *column_base = (ColumnBase *) palloc0(sizeof(ColumnBase)); + + column_base->relid = rte->relid; + column_base->relname = get_rel_name(rte->relid); + column_base->colname = strVal(list_nth(rte->eref->colnames, + var->varattno - 1)); + column_base->relalias = rte->eref->aliasname; + return column_base; + } else if (rte->rtekind == RTE_JOIN) { Var *colvar = list_nth(rte->joinaliasvars, var->varattno - 1); /* continue resolving recursively */ return get_base_var(colvar, rtables); + //may need to set this, toocolumn_base->relalias = rte->eref->aliasname; } - else + else if (rte->rtekind == RTE_SUBQUERY) { - return NULL; + /* + * Handle views like select * from v1 where col1 = 1 + * where col1 is partition column of base relation + */ + /* the varattno corresponds with the subquery's target list (projections) */ + TargetEntry *tle = list_nth(rte->subquery->targetList, var->varattno - 1); /* or varno? */ + + if (!IsA(tle->expr, Var)) + return NULL; /* not column based expressoin, return */ + else + { + Var *colvar = (Var *) tle->expr; + + /* continue resolving recursively */ + return get_base_var(colvar, rte->subquery->rtable); + } } + + return NULL; } @@ -316,10 +362,11 @@ get_plan_nodes_insert(Query *query) return NULL; /* Make sure there is just one table */ - if (query->rtable == NULL || query->rtable->length != 1) + if (query->rtable == NULL) return NULL; - rte = (RangeTblEntry *) lfirst(list_head(query->rtable)); + rte = (RangeTblEntry *) list_nth(query->rtable, query->resultRelation - 1); + if (rte != NULL && rte->rtekind != RTE_RELATION) /* Bad relation type */ @@ -402,10 +449,6 @@ get_plan_nodes_insert(Query *query) static bool examine_conditions(Special_Conditions *conditions, List *rtables, Node *expr_node) { - char *rel_name, - *rel_name2; - char *col_name, - *col_name2; RelationLocInfo *rel_loc_info1, *rel_loc_info2; Const *constant; @@ -472,23 +515,14 @@ examine_conditions(Special_Conditions *conditions, List *rtables, Node *expr_nod /* Look for a table */ if (IsA(arg1, Var)) { - RangeTblEntry *rte1, - *rte2; - /* get the RangeTableEntry */ Var *colvar = (Var *) arg1; - colvar = get_base_var(colvar, rtables); + ColumnBase *column_base = get_base_var(colvar, rtables); - if (!colvar) + if (!column_base) return false; - rte1 = list_nth(rtables, colvar->varno - 1); - - rel_name = get_rel_name(rte1->relid); - col_name = strVal(list_nth(rte1->eref->colnames, - colvar->varattno - 1)); - /* Look at other argument */ /* We may have a cast, try and handle it */ @@ -502,21 +536,21 @@ examine_conditions(Special_Conditions *conditions, List *rtables, Node *expr_nod /* We have column = literal. Check if partitioned case */ constant = (Const *) arg2; - rel_loc_info1 = GetRelationLocInfo(rte1->relid); + rel_loc_info1 = GetRelationLocInfo(column_base->relid); if (!rel_loc_info1) return false; /* If hash partitioned, check if the part column was used */ - if (IsHashColumn(rel_loc_info1, col_name)) + if (IsHashColumn(rel_loc_info1, column_base->colname)) { /* add to partitioned literal join conditions */ Literal_Comparison *lit_comp = palloc(sizeof(Literal_Comparison)); - lit_comp->relid = rte1->relid; + lit_comp->relid = column_base->relid; lit_comp->rel_loc_info = rel_loc_info1; - lit_comp->col_name = col_name; + lit_comp->col_name = column_base->colname; lit_comp->constant = constant->constvalue; conditions->partitioned_literal_comps = lappend( @@ -537,23 +571,22 @@ examine_conditions(Special_Conditions *conditions, List *rtables, Node *expr_nod else if (IsA(arg2, Var)) { PGXC_Join *pgxc_join; + ColumnBase *column_base2; Var *colvar2 = (Var *) arg2; - rel_loc_info1 = GetRelationLocInfo(rte1->relid); + rel_loc_info1 = GetRelationLocInfo(column_base->relid); if (!rel_loc_info1) return false; - colvar2 = get_base_var(colvar2, rtables); - if (!colvar2) + column_base2 = get_base_var(colvar2, rtables); + if (!column_base2) return false; - rte2 = list_nth(rtables, colvar2->varno - 1); - rel_name2 = get_rel_name(rte2->relid); - rel_loc_info2 = GetRelationLocInfo(rte2->relid); + rel_loc_info2 = GetRelationLocInfo(column_base2->relid); /* get data struct about these two relations joining */ - pgxc_join = find_or_create_pgxc_join(rte1->relid, rte1->eref->aliasname, - rte2->relid, rte2->eref->aliasname); + pgxc_join = find_or_create_pgxc_join(column_base->relid, column_base->relalias, + column_base2->relid, column_base2->relalias); /* * pgxc_join->condition_list = @@ -569,7 +602,7 @@ examine_conditions(Special_Conditions *conditions, List *rtables, Node *expr_nod if (rel_loc_info2->locatorType != LOCATOR_TYPE_REPLICATED) { /* Note other relation, saves us work later. */ - conditions->base_rel_name = rel_name2; + conditions->base_rel_name = column_base2->relname; conditions->base_rel_loc_info = rel_loc_info2; if (rel_loc_info1) FreeRelationLocInfo(rel_loc_info1); @@ -577,7 +610,7 @@ examine_conditions(Special_Conditions *conditions, List *rtables, Node *expr_nod if (conditions->base_rel_name == NULL) { - conditions->base_rel_name = rel_name; + conditions->base_rel_name = column_base->relname; conditions->base_rel_loc_info = rel_loc_info1; if (rel_loc_info2) FreeRelationLocInfo(rel_loc_info2); @@ -595,7 +628,7 @@ examine_conditions(Special_Conditions *conditions, List *rtables, Node *expr_nod lappend(conditions->replicated_joins, opexpr); /* other relation not replicated, note it for later */ - conditions->base_rel_name = rel_name; + conditions->base_rel_name = column_base->relname; conditions->base_rel_loc_info = rel_loc_info1; /* note nature of join between the two relations */ @@ -613,11 +646,8 @@ examine_conditions(Special_Conditions *conditions, List *rtables, Node *expr_nod * PGXCTODO - for the prototype, we assume all partitioned * tables are on the same nodes. */ - col_name2 = strVal(list_nth(rte2->eref->colnames, - colvar2->varattno - 1)); - - if (IsHashColumn(rel_loc_info1, col_name) - && IsHashColumn(rel_loc_info2, col_name2)) + if (IsHashColumn(rel_loc_info1, column_base->colname) + && IsHashColumn(rel_loc_info2, column_base2->colname)) { /* We found a partitioned join */ conditions->partitioned_parent_child = @@ -740,13 +770,16 @@ get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) ListCell *lc, *item; Special_Conditions *special_conditions; - OpExpr *opexpr; + OpExpr *opexpr; Var *colvar; RelationLocInfo *rel_loc_info; - Exec_Nodes *exec_nodes; - Exec_Nodes *test_exec_nodes; + Exec_Nodes *test_exec_nodes = NULL; + Exec_Nodes *exec_nodes = NULL; + Exec_Nodes *current_nodes = NULL; + Exec_Nodes *from_query_nodes = NULL; TableUsageType table_usage_type = TABLE_USAGE_TYPE_NO_TABLE; TableUsageType current_usage_type = TABLE_USAGE_TYPE_NO_TABLE; + int from_subquery_count = 0; exec_nodes = NULL; @@ -778,13 +811,11 @@ get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) exec_nodes = (Exec_Nodes *) palloc0(sizeof(Exec_Nodes)); exec_nodes->tableusagetype = TABLE_USAGE_TYPE_PGCATALOG; free_special_relations(special_conditions); - free_join_list(); return exec_nodes; } /* complicated */ free_special_relations(special_conditions); - free_join_list(); return NULL; } } @@ -797,20 +828,49 @@ get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) if (rte->rtekind == RTE_SUBQUERY) { + from_subquery_count++; /* * Recursively call for subqueries. * Note this also works for views, which are rewritten as subqueries. */ - Exec_Nodes *sub_nodes = get_plan_nodes(query_plan, rte->subquery, isRead); - if (sub_nodes) - current_usage_type = sub_nodes->tableusagetype; + current_nodes = get_plan_nodes(query_plan, rte->subquery, isRead); + if (current_nodes) + current_usage_type = current_nodes->tableusagetype; else { /* could be complicated */ free_special_relations(special_conditions); - free_join_list(); return NULL; } + + /* We compare to make sure that the subquery is safe to execute with previous- + * we may have multiple ones in the FROM clause. + * We handle the simple case of allowing multiple subqueries in the from clause, + * but only allow one of them to not contain replicated tables + */ + if (!from_query_nodes) + from_query_nodes = current_nodes; + else if (current_nodes->tableusagetype == TABLE_USAGE_TYPE_USER_REPLICATED) + { + /* ok, safe */ + if (!from_query_nodes) + from_query_nodes = current_nodes; + } + else + { + if (from_query_nodes->tableusagetype == TABLE_USAGE_TYPE_USER_REPLICATED) + from_query_nodes = current_nodes; + else + { + /* Allow if they are both using one node, and the same one */ + if (!same_single_node (from_query_nodes->nodelist, current_nodes->nodelist)) + { + /* Complicated */ + free_special_relations(special_conditions); + return NULL; + } + } + } } else if (rte->rtekind == RTE_RELATION) { @@ -818,7 +878,7 @@ get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) if (get_rel_namespace(rte->relid) == PG_CATALOG_NAMESPACE) current_usage_type = TABLE_USAGE_TYPE_PGCATALOG; else - current_usage_type = TABLE_USAGE_TYPE_USER_TABLE; + current_usage_type = TABLE_USAGE_TYPE_USER; } else if (rte->rtekind == RTE_FUNCTION) { @@ -827,13 +887,17 @@ get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) if (get_func_namespace(funcexpr->funcid) == PG_CATALOG_NAMESPACE) current_usage_type = TABLE_USAGE_TYPE_PGCATALOG; else - current_usage_type = TABLE_USAGE_TYPE_USER_TABLE; + { + //current_usage_type = TABLE_USAGE_TYPE_USER; + /* Complicated */ + free_special_relations(special_conditions); + return NULL; + } } else { /* could be complicated */ free_special_relations(special_conditions); - free_join_list(); return NULL; } @@ -844,7 +908,6 @@ get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) { /* mixed- too complicated for us for now */ free_special_relations(special_conditions); - free_join_list(); return NULL; } } @@ -852,7 +915,6 @@ get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) { /* could be complicated */ free_special_relations(special_conditions); - free_join_list(); return NULL; } } @@ -871,7 +933,6 @@ get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) { /* if cross joins may exist, just return NULL */ free_special_relations(special_conditions); - free_join_list(); return NULL; } @@ -886,7 +947,6 @@ get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) if (pgxcjoin->join_type == JOIN_OTHER) { free_special_relations(special_conditions); - free_join_list(); return NULL; } } @@ -897,36 +957,64 @@ get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) if (special_conditions->partitioned_parent_child == NULL && special_conditions->partitioned_literal_comps == NULL) { - if (special_conditions->replicated_joins == NULL - && (query->rtable == NULL || query->rtable->length > 1)) + /* + * We have either a single table, just replicated tables, or a + * table that just joins with replicated tables, or something + * complicated. + */ - /* - * This is too complicated for a single step, or there is no FROM - * clause - */ - exec_nodes = NULL; - else + /* See if we noted a table earlier to use */ + rel_loc_info = special_conditions->base_rel_loc_info; + + if (rel_loc_info == NULL) { - /* - * We have either a single table, just replicated tables, or a - * table that just joins with replicated tables. - */ + RangeTblEntry *rtesave = NULL; - /* See if we noted a table earlier to use */ - rel_loc_info = special_conditions->base_rel_loc_info; + foreach(lc, query->rtable) + { + rte = (RangeTblEntry *) lfirst(lc); + + /* + * If the query is rewritten (which can be due to rules or views), + * ignore extra stuff. Also ignore subqueries we have processed + */ + if (!rte->inFromCl || rte->rtekind != RTE_RELATION) + continue; + + /* PGXCTODO - handle RTEs that are functions */ + if (rtesave) + /* + * Too complicated, we have multiple relations that still + * cannot be joined safely + */ + return NULL; + + rtesave = rte; + } - if (rel_loc_info == NULL) + if (rtesave) { /* a single table, just grab it */ - rte = (RangeTblEntry *) linitial(query->rtable); - rel_loc_info = GetRelationLocInfo(rte->relid); + rel_loc_info = GetRelationLocInfo(rtesave->relid); if (!rel_loc_info) return NULL; - } + exec_nodes = GetRelationNodes(rel_loc_info, NULL, isRead); + } + } + else + { exec_nodes = GetRelationNodes(rel_loc_info, NULL, isRead); - exec_nodes->tableusagetype = table_usage_type; + } + + /* Note replicated table usage for determining safe queries */ + if (exec_nodes) + { + if (table_usage_type == TABLE_USAGE_TYPE_USER && IsReplicated(rel_loc_info)) + table_usage_type = TABLE_USAGE_TYPE_USER_REPLICATED; + else + exec_nodes->tableusagetype = table_usage_type; } } /* check for partitioned col comparison against a literal */ @@ -950,15 +1038,10 @@ get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) exec_nodes = test_exec_nodes; else { - if ((exec_nodes && list_length(exec_nodes->nodelist) > 1) - || (test_exec_nodes && list_length(test_exec_nodes->nodelist) > 1)) - /* there should only be one */ - exec_nodes = NULL; - else + if (!same_single_node(exec_nodes->nodelist, test_exec_nodes->nodelist)) { - /* Make sure they use the same nodes */ - if (linitial_int(test_exec_nodes->nodelist) != linitial_int(exec_nodes->nodelist)) - exec_nodes = NULL; + free_special_relations(special_conditions); + return NULL; } } } @@ -970,14 +1053,18 @@ get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) * no partitioned column comparison condition with a literal. We just * use one of the tables as a basis for node determination. */ + ColumnBase *column_base; + opexpr = (OpExpr *) linitial(special_conditions->partitioned_parent_child); colvar = (Var *) linitial(opexpr->args); /* get the RangeTableEntry */ - rte = list_nth(query->rtable, colvar->varno - 1); - rel_loc_info = GetRelationLocInfo(rte->relid); + column_base = get_base_var(colvar, query->rtable); + if (!column_base) + return false; + rel_loc_info = GetRelationLocInfo(column_base->relid); if (!rel_loc_info) return false; @@ -985,7 +1072,29 @@ get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) exec_nodes->tableusagetype = table_usage_type; } free_special_relations(special_conditions); - free_join_list(); + + if (from_query_nodes) + { + if (!exec_nodes) + return from_query_nodes; + /* Just use exec_nodes if the from subqueries are all replicated or using the exact + * same node + */ + else if (from_query_nodes->tableusagetype == TABLE_USAGE_TYPE_USER_REPLICATED + || (same_single_node(from_query_nodes->nodelist, exec_nodes->nodelist))) + return exec_nodes; + else + { + /* We allow views, where the (rewritten) subquery may be on all nodes, but the parent + * query applies a condition on the from subquery. + */ + if (list_length(query->jointree->fromlist) == from_subquery_count + && list_length(exec_nodes->nodelist) == 1) + return exec_nodes; + } + /* Too complicated, give up */ + return NULL; + } return exec_nodes; } @@ -999,26 +1108,30 @@ get_plan_nodes(Query_Plan *query_plan, Query *query, bool isRead) static Exec_Nodes * get_plan_nodes_command(Query_Plan *query_plan, Query *query) { + Exec_Nodes *exec_nodes = NULL; switch (query->commandType) { case CMD_SELECT: - return get_plan_nodes(query_plan, query, true); + exec_nodes = get_plan_nodes(query_plan, query, true); + break; case CMD_INSERT: - return get_plan_nodes_insert(query); + exec_nodes = get_plan_nodes_insert(query); + break; case CMD_UPDATE: - /* treat as a select */ - return get_plan_nodes(query_plan, query, false); - case CMD_DELETE: /* treat as a select */ - return get_plan_nodes(query_plan, query, false); + exec_nodes = get_plan_nodes(query_plan, query, false); + break; default: return NULL; } + + free_join_list(); + return exec_nodes; } @@ -1498,6 +1611,7 @@ GetQueryPlan(Node *parsetree, const char *sql_statement, List *querytree_list) case T_RemoveOpFamilyStmt: case T_TruncateStmt: case T_VariableSetStmt: + case T_ViewStmt: /* * Also support these, should help later with pg_restore, although @@ -1581,7 +1695,6 @@ GetQueryPlan(Node *parsetree, const char *sql_statement, List *querytree_list) case T_PrepareStmt: case T_RuleStmt: case T_UnlistenStmt: - case T_ViewStmt: /* fall through */ default: /* Allow for override */ diff --git a/src/include/pgxc/locator.h b/src/include/pgxc/locator.h index 5fea37a..5ead756 100644 --- a/src/include/pgxc/locator.h +++ b/src/include/pgxc/locator.h @@ -25,6 +25,7 @@ #define HASH_SIZE 4096 #define HASH_MASK 0x00000FFF; +#define IsReplicated(x) (x->locatorType == LOCATOR_TYPE_REPLICATED) #include "utils/relcache.h" @@ -47,7 +48,8 @@ typedef enum { TABLE_USAGE_TYPE_NO_TABLE, TABLE_USAGE_TYPE_PGCATALOG, - TABLE_USAGE_TYPE_USER_TABLE, + TABLE_USAGE_TYPE_USER, + TABLE_USAGE_TYPE_USER_REPLICATED, /* based on a replicated table */ TABLE_USAGE_TYPE_MIXED } TableUsageType; ----------------------------------------------------------------------- Summary of changes: src/backend/catalog/heap.c | 6 + src/backend/pgxc/plan/planner.c | 303 +++++++++++++++++++++++++++------------ src/include/pgxc/locator.h | 4 +- 3 files changed, 217 insertions(+), 96 deletions(-) hooks/post-receive -- Postgres-XC |
From: mason_s <ma...@us...> - 2010-06-01 21:07:02
|
Project "Postgres-XC". The branch, master has been updated via ffe244ab59c464283ac1833e13377782bee1c122 (commit) from 63b0858e76a740e6b0a5e30fa27d7b1d761ac6af (commit) - Log ----------------------------------------------------------------- commit ffe244ab59c464283ac1833e13377782bee1c122 Author: Mason S <mas...@ma...> Date: Tue Jun 1 17:05:56 2010 -0400 Support for pg_dump and pg_restore. The CREATE TABLE command generation now includes distribution information. Written by Michael Paquier diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index fdb9564..ec86c18 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -3187,6 +3187,10 @@ getTables(int *numTables) int i_relfrozenxid; int i_owning_tab; int i_owning_col; +#ifdef PGXC + int i_pgxclocatortype; + int i_pgxcattnum; +#endif int i_reltablespace; int i_reloptions; int i_toastreloptions; @@ -3219,6 +3223,8 @@ getTables(int *numTables) /* * Left join to pick up dependency info linking sequences to their * owning column, if any (note this dependency is AUTO as of 8.2) + * PGXC is based on PostgreSQL version 8.4, it is not necessary to + * to modify the other SQL queries. */ appendPQExpBuffer(query, "SELECT c.tableoid, c.oid, c.relname, " @@ -3230,7 +3236,11 @@ getTables(int *numTables) "d.refobjid AS owning_tab, " "d.refobjsubid AS owning_col, " "(SELECT spcname FROM pg_tablespace t WHERE t.oid = c.reltablespace) AS reltablespace, " - "array_to_string(c.reloptions, ', ') AS reloptions, " +#ifdef PGXC + "(SELECT pclocatortype from pgxc_class v where v.pcrelid = c.oid) AS pgxclocatortype," + "(SELECT pcattnum from pgxc_class v where v.pcrelid = c.oid) AS pgxcattnum," +#endif + "array_to_string(c.reloptions, ', ') AS reloptions, " "array_to_string(array(SELECT 'toast.' || x FROM unnest(tc.reloptions) x), ', ') AS toast_reloptions " "FROM pg_class c " "LEFT JOIN pg_depend d ON " @@ -3448,6 +3458,10 @@ getTables(int *numTables) i_relfrozenxid = PQfnumber(res, "relfrozenxid"); i_owning_tab = PQfnumber(res, "owning_tab"); i_owning_col = PQfnumber(res, "owning_col"); +#ifdef PGXC + i_pgxclocatortype = PQfnumber(res, "pgxclocatortype"); + i_pgxcattnum = PQfnumber(res, "pgxcattnum"); +#endif i_reltablespace = PQfnumber(res, "reltablespace"); i_reloptions = PQfnumber(res, "reloptions"); i_toastreloptions = PQfnumber(res, "toast_reloptions"); @@ -3495,6 +3509,19 @@ getTables(int *numTables) tblinfo[i].owning_tab = atooid(PQgetvalue(res, i, i_owning_tab)); tblinfo[i].owning_col = atoi(PQgetvalue(res, i, i_owning_col)); } +#ifdef PGXC + /* Not all the tables have pgxc locator Data */ + if (PQgetisnull(res, i, i_pgxclocatortype)) + { + tblinfo[i].pgxclocatortype = 'E'; + tblinfo[i].pgxcattnum = 0; + } + else + { + tblinfo[i].pgxclocatortype = *(PQgetvalue(res, i, i_pgxclocatortype)); + tblinfo[i].pgxcattnum = atoi(PQgetvalue(res, i, i_pgxcattnum)); + } +#endif tblinfo[i].reltablespace = strdup(PQgetvalue(res, i, i_reltablespace)); tblinfo[i].reloptions = strdup(PQgetvalue(res, i, i_reloptions)); tblinfo[i].toast_reloptions = strdup(PQgetvalue(res, i, i_toastreloptions)); @@ -9939,6 +9966,30 @@ dumpTableSchema(Archive *fout, TableInfo *tbinfo) appendPQExpBuffer(q, ")"); } +#ifdef PGXC + /* Add the grammar extension linked to PGXC depending on data got from pgxc_class */ + if (tbinfo->pgxclocatortype != 'E') + { + /* N: DISTRIBUTE BY ROUND ROBIN */ + if (tbinfo->pgxclocatortype == 'N') + { + appendPQExpBuffer(q, "\nDISTRIBUTE BY ROUND ROBIN"); + } + /* R: DISTRIBUTE BY REPLICATED */ + else if (tbinfo->pgxclocatortype == 'R') + { + appendPQExpBuffer(q, "\nDISTRIBUTE BY REPLICATION"); + } + /* H: DISTRIBUTE BY HASH */ + else if (tbinfo->pgxclocatortype == 'H') + { + int hashkey = tbinfo->pgxcattnum; + appendPQExpBuffer(q, "\nDISTRIBUTE BY HASH (%s)", + fmtId(tbinfo->attnames[hashkey - 1])); + } + } +#endif + appendPQExpBuffer(q, ";\n"); /* diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index a9b3dae..276a3d6 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -16,6 +16,10 @@ #include "postgres_fe.h" +#ifdef PGXC +#include "pgxc/pgxc.h" +#endif + /* * pg_dump uses two different mechanisms for identifying database objects: * @@ -234,6 +238,11 @@ typedef struct _tableInfo bool interesting; /* true if need to collect more data */ +#ifdef PGXC + /* PGXC table locator Data */ + char pgxclocatortype; /* Type of PGXC table locator */ + int pgxcattnum; /* Number of the attribute the table is partitioned with */ +#endif /* * These fields are computed only if we decide the table is interesting * (it's either a table to dump, or a direct parent of a dumpable table). ----------------------------------------------------------------------- Summary of changes: src/bin/pg_dump/pg_dump.c | 53 ++++++++++++++++++++++++++++++++++++++++++++- src/bin/pg_dump/pg_dump.h | 9 +++++++ 2 files changed, 61 insertions(+), 1 deletions(-) hooks/post-receive -- Postgres-XC |
From: mason_s <ma...@us...> - 2010-06-01 20:14:49
|
Project "Postgres-XC". The branch, master has been updated via 63b0858e76a740e6b0a5e30fa27d7b1d761ac6af (commit) from 7b0d97791bdd0483e4ec9fe4079f494b76523b25 (commit) - Log ----------------------------------------------------------------- commit 63b0858e76a740e6b0a5e30fa27d7b1d761ac6af Author: Mason S <mas...@ma...> Date: Tue Jun 1 16:04:43 2010 -0400 Add support for immutable stored functions and enable support for many other DDL commands (some of which also depend on stored functions), like CREATE OPERATOR. Written by Michael Paquier diff --git a/src/backend/commands/functioncmds.c b/src/backend/commands/functioncmds.c index f0989bf..b1349de 100644 --- a/src/backend/commands/functioncmds.c +++ b/src/backend/commands/functioncmds.c @@ -875,6 +875,17 @@ CreateFunction(CreateFunctionStmt *stmt, const char *queryString) interpret_AS_clause(languageOid, languageName, funcname, as_clause, &prosrc_str, &probin_str); +#ifdef PGXC + /* + * For the time being, only immutable functions are allowed to be created + * for a user. A superuser can create volatile and stable functions freely. + */ + if (volatility != PROVOLATILE_IMMUTABLE && !superuser()) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("stable and volatile not yet supported, function volatility has to be immutable"))); +#endif + /* * Set default values for COST and ROWS depending on other parameters; * reject ROWS if it's not returnsSet. NB: pg_dump knows these default diff --git a/src/backend/pgxc/plan/planner.c b/src/backend/pgxc/plan/planner.c index ffb2631..13a0f8b 100644 --- a/src/backend/pgxc/plan/planner.c +++ b/src/backend/pgxc/plan/planner.c @@ -1373,8 +1373,6 @@ GetQueryPlan(Node *parsetree, const char *sql_statement, List *querytree_list) break; /* Statements that we only want to execute on the Coordinator */ - case T_AlterSeqStmt: - case T_CommentStmt: case T_CreateSeqStmt: case T_VariableShowStmt: query_plan->exec_loc_type = EXEC_ON_COORD; @@ -1400,26 +1398,104 @@ GetQueryPlan(Node *parsetree, const char *sql_statement, List *querytree_list) query_plan->exec_loc_type = EXEC_ON_COORD | EXEC_ON_DATA_NODES; query_plan->force_autocommit = true; break; + case T_AlterObjectSchemaStmt: + /* Sequences are just defined on coordinator */ + if (((AlterObjectSchemaStmt *) parsetree)->objectType == OBJECT_SEQUENCE) + query_plan->exec_loc_type = EXEC_ON_COORD; + else + query_plan->exec_loc_type = EXEC_ON_COORD | EXEC_ON_DATA_NODES; + break; + case T_AlterSeqStmt: + /* Alter sequence is not supported yet, it needs complementary interactions with GTM */ + ereport(ERROR, + (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), + (errmsg("This command is not yet supported")))); + break; + case T_AlterTableStmt: + /* + * ALTER SEQUENCE needs some interactions with GTM, + * this query is not supported yet. + */ + if (((AlterTableStmt *) parsetree)->relkind == OBJECT_SEQUENCE) + ereport(ERROR, + (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), + (errmsg("Cannot yet alter a sequence")))); + else + query_plan->exec_loc_type = EXEC_ON_COORD | EXEC_ON_DATA_NODES; + break; + case T_CommentStmt: + /* Sequences are only defined on coordinator */ + if (((CommentStmt *) parsetree)->objtype == OBJECT_SEQUENCE) + query_plan->exec_loc_type = EXEC_ON_COORD; + else + query_plan->exec_loc_type = EXEC_ON_COORD | EXEC_ON_DATA_NODES; + break; + case T_RenameStmt: + /* Sequences are only defined on coordinator */ + if (((RenameStmt *) parsetree)->renameType == OBJECT_SEQUENCE) + /* + * Renaming a sequence requires interactions with GTM + * what is not supported yet + */ + ereport(ERROR, + (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), + (errmsg("Sequence renaming not yet supported, you should drop it and created a new one")))); + else + query_plan->exec_loc_type = EXEC_ON_COORD | EXEC_ON_DATA_NODES; + break; + case T_DropPropertyStmt: + /* + * Triggers are not yet supported by PGXC + * all other queries are executed on both Coordinator and Datanode + * On the same point, assert also is not supported + */ + if (((DropPropertyStmt *)parsetree)->removeType == OBJECT_TRIGGER) + ereport(ERROR, + (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), + (errmsg("This command is not yet supported.")))); + else + query_plan->exec_loc_type = EXEC_ON_COORD | EXEC_ON_DATA_NODES; + break; /* * Statements that we execute on both the Coordinator and Data Nodes */ - case T_AlterTableStmt: case T_AlterDatabaseStmt: case T_AlterDatabaseSetStmt: case T_AlterDomainStmt: - case T_AlterObjectSchemaStmt: + case T_AlterFdwStmt: + case T_AlterForeignServerStmt: + case T_AlterFunctionStmt: + case T_AlterOpFamilyStmt: + case T_AlterTSConfigurationStmt: + case T_AlterTSDictionaryStmt: + case T_ClosePortalStmt: /* In case CLOSE ALL is issued */ + case T_CompositeTypeStmt: case T_ConstraintsSetStmt: + case T_CreateCastStmt: + case T_CreateConversionStmt: case T_CreateDomainStmt: case T_CreateEnumStmt: + case T_CreateFdwStmt: + case T_CreateForeignServerStmt: + case T_CreateFunctionStmt: /* Only global functions are supported */ + case T_CreateOpClassStmt: + case T_CreateOpFamilyStmt: + case T_CreatePLangStmt: case T_CreateStmt: case T_CreateSchemaStmt: case T_DeallocateStmt: /* Allow for DEALLOCATE ALL */ case T_DiscardStmt: + case T_DropCastStmt: + case T_DropFdwStmt: + case T_DropForeignServerStmt: + case T_DropPLangStmt: case T_IndexStmt: case T_LockStmt: case T_ReindexStmt: - case T_RenameStmt: + case T_RemoveFuncStmt: + case T_RemoveOpClassStmt: + case T_RemoveOpFamilyStmt: case T_TruncateStmt: case T_VariableSetStmt: @@ -1431,15 +1507,18 @@ GetQueryPlan(Node *parsetree, const char *sql_statement, List *querytree_list) case T_GrantRoleStmt: case T_CreateRoleStmt: case T_AlterRoleStmt: + case T_AlterRoleSetStmt: + case T_AlterUserMappingStmt: + case T_CreateUserMappingStmt: case T_DropRoleStmt: case T_AlterOwnerStmt: case T_DropOwnedStmt: + case T_DropUserMappingStmt: case T_ReassignOwnedStmt: case T_DefineStmt: /* used for aggregates, some types */ query_plan->exec_loc_type = EXEC_ON_COORD | EXEC_ON_DATA_NODES; break; - case T_TransactionStmt: switch (((TransactionStmt *) parsetree)->kind) { @@ -1463,52 +1542,43 @@ GetQueryPlan(Node *parsetree, const char *sql_statement, List *querytree_list) * data node will do */ case T_ExplainStmt: + if (((ExplainStmt *) parsetree)->analyze) + ereport(ERROR, + (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), + (errmsg("ANALYZE with EXPLAIN is currently not supported.")))); + + query_step->exec_nodes = palloc0(sizeof(Exec_Nodes)); query_step->exec_nodes->nodelist = GetAnyDataNode(); + query_step->exec_nodes->baselocatortype = LOCATOR_TYPE_RROBIN; query_plan->exec_loc_type = EXEC_ON_DATA_NODES; break; /* - * Statements we do not yet want to handle. + * Trigger queries are not yet supported by PGXC. + * Tablespace queries are also not yet supported. + * Two nodes on the same servers cannot use the same tablespace. + */ + case T_CreateTableSpaceStmt: + case T_CreateTrigStmt: + case T_DropTableSpaceStmt: + ereport(ERROR, + (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), + (errmsg("This command is not yet supported.")))); + break; + + /* + * Other statements we do not yet want to handle. * By default they would be fobidden, but we list these for reference. * Note that there is not a 1-1 correspndence between * SQL command and the T_*Stmt structures. */ - case T_AlterFdwStmt: - case T_AlterForeignServerStmt: - case T_AlterFunctionStmt: - case T_AlterOpFamilyStmt: - case T_AlterTSConfigurationStmt: - case T_AlterTSDictionaryStmt: - case T_AlterUserMappingStmt: - case T_ClosePortalStmt: - case T_CompositeTypeStmt: - case T_CreateCastStmt: - case T_CreateConversionStmt: - case T_CreateFdwStmt: - case T_CreateFunctionStmt: - case T_CreateForeignServerStmt: - case T_CreateOpClassStmt: - case T_CreateOpFamilyStmt: - case T_CreatePLangStmt: - case T_CreateTableSpaceStmt: - case T_CreateTrigStmt: - case T_CreateUserMappingStmt: case T_DeclareCursorStmt: - case T_DropCastStmt: - case T_DropFdwStmt: - case T_DropForeignServerStmt: - case T_DropPLangStmt: - case T_DropPropertyStmt: - case T_DropTableSpaceStmt: case T_ExecuteStmt: case T_FetchStmt: case T_ListenStmt: case T_LoadStmt: case T_NotifyStmt: case T_PrepareStmt: - case T_RemoveFuncStmt: - case T_RemoveOpClassStmt: - case T_RemoveOpFamilyStmt: case T_RuleStmt: case T_UnlistenStmt: case T_ViewStmt: ----------------------------------------------------------------------- Summary of changes: src/backend/commands/functioncmds.c | 11 +++ src/backend/pgxc/plan/planner.c | 142 ++++++++++++++++++++++++++--------- 2 files changed, 117 insertions(+), 36 deletions(-) hooks/post-receive -- Postgres-XC |