Memory barrier support for PostgreSQL.

This is not actually used anywhere yet, but it gets the basic infrastructure in place. It is fairly likely that there are bugs, and support for some important platforms may be missing, so we'll need to refine this as we go along.
author: Robert Haas 2011-09-23 21:52:43 +0000
committer: Robert Haas 2011-09-23 21:52:43 +0000
commit: 0c8eda6258805223fa412ab55a1f130fbc51afa0 (patch)
tree: 9e20f64f3fb04df287b35e1f934032d7671a7316 /src/include/storage/barrier.h
parent: 291873c1554ceecc71a81c25aef4f1260c15c222 (diff)
1 files changed, 171 insertions, 0 deletions
diff --git a/src/include/storage/barrier.h b/src/include/storage/barrier.h
new file mode 100644
index 00000000000..0286817a38f
--- /dev/null
+++ b/src/include/storage/barrier.h
@@ -0,0 +1,171 @@
+/*-------------------------------------------------------------------------
+ *
+ * barrier.h
+ *	  Memory barrier operations.
+ *
+ * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/barrier.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef BARRIER_H
+#define BARRIER_H
+
+#include "storage/s_lock.h"
+
+extern slock_t	dummy_spinlock;
+
+/*
+ * A compiler barrier need not (and preferably should not) emit any actual
+ * machine code, but must act as an optimization fence: the compiler must not
+ * reorder loads or stores to main memory around the barrier.  However, the
+ * CPU may still reorder loads or stores at runtime, if the architecture's
+ * memory model permits this.
+ *
+ * A memory barrier must act as a compiler barrier, and in addition must
+ * guarantee that all loads and stores issued prior to the barrier are
+ * completed before any loads or stores issued after the barrier.  Unless
+ * loads and stores are totally ordered (which is not the case on most
+ * architectures) this requires issuing some sort of memory fencing
+ * instruction.
+ * 
+ * A read barrier must act as a compiler barrier, and in addition must
+ * guarantee that any loads issued prior to the barrier are completed before
+ * any loads issued after the barrier.  Similarly, a write barrier acts
+ * as a compiler barrier, and also orders stores.  Read and write barriers
+ * are thus weaker than a full memory barrier, but stronger than a compiler
+ * barrier.  In practice, on machines with strong memory ordering, read and
+ * write barriers may require nothing more than a compiler barrier.
+ *
+ * For an introduction to using memory barriers within the PostgreSQL backend,
+ * see src/backend/storage/lmgr/README.barrier
+ */
+
+#if defined(DISABLE_BARRIERS)
+
+/*
+ * Fall through to the spinlock-based implementation.
+ */
+
+#elif defined(__INTEL_COMPILER)
+
+/*
+ * icc defines __GNUC__, but doesn't support gcc's inline asm syntax
+ */
+#define pg_memory_barrier()		_mm_mfence()
+#define pg_compiler_barrier()	__memory_barrier()
+
+#elif defined(__GNUC__)
+
+/* This works on any architecture, since it's only talking to GCC itself. */
+#define pg_compiler_barrier()	__asm__ __volatile__("" : : : "memory")
+
+#if defined(__i386__) || defined(__x86_64__)		/* 32 or 64 bit x86 */
+
+/*
+ * x86 and x86_64 do not allow loads to be reorded with other loads, or
+ * stores to be reordered with other stores, but a load can be performed
+ * before a subsequent store.
+ *
+ * "lock; addl" has worked for longer than "mfence".
+ *
+ * Technically, some x86-ish chips support uncached memory access and/or
+ * special instructions that are weakly ordered.  In those cases we'd need
+ * the read and write barriers to be lfence and sfence.  But since we don't
+ * do those things, a compiler barrier should be enough.
+ */
+#define pg_memory_barrier()		\
+	__asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory")
+#define pg_read_barrier()		pg_compiler_barrier()
+#define pg_write_barrier()		pg_compiler_barrier()
+
+#elif defined(__ia64__) || defined(__ia64)
+
+/*
+ * Itanium is weakly ordered, so read and write barriers require a full
+ * fence.
+ */
+#define pg_memory_barrier()		__asm__ __volatile__ ("mf" : : : "memory")
+
+#elif defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
+
+/*
+ * lwsync orders loads with respect to each other, and similarly with stores.
+ * But a load can be performed before a subsequent store, so sync must be used
+ * for a full memory barrier.
+ */
+#define pg_memory_barrier()		__asm__ __volatile__ ("sync" : : : "memory")
+#define pg_read_barrier()		__asm__ __volatile__ ("lwsync" : : : "memory")
+#define pg_write_barrier()		__asm__ __volatile__ ("lwsync" : : : "memory")
+
+#elif defined(__alpha) || defined(__alpha__)      /* Alpha */
+
+/*
+ * Unlike all other known architectures, Alpha allows dependent reads to be
+ * reordered, but we don't currently find it necessary to provide a conditional
+ * read barrier to cover that case.  We might need to add that later.
+ */
+#define pg_memory_barrier()		__asm__ __volatile__ ("mb" : : : "memory")
+#define pg_read_barrier()		__asm__ __volatile__ ("rmb" : : : "memory")
+#define pg_write_barrier()		__asm__ __volatile__ ("wmb" : : : "memory")
+
+#elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
+
+/*
+ * If we're on GCC 4.1.0 or higher, we should be able to get a memory
+ * barrier out of this compiler built-in.  But we prefer to rely on our
+ * own definitions where possible, and use this only as a fallback.
+ */
+#define pg_memory_barrier()		__sync_synchronize()
+
+#endif
+
+#elif defined(__ia64__) || defined(__ia64)
+
+#define pg_compiler_barrier()	_Asm_sched_fence()
+#define pg_memory_barrier()		_Asm_mf()
+
+#elif defined(WIN32_ONLY_COMPILER)
+
+/* Should work on both MSVC and Borland. */
+#include <intrin.h>
+#pragma intrinsic(_ReadWriteBarrier)
+#define pg_compiler_barrier()	_ReadWriteBarrier()
+#define pg_memory_barrier()		MemoryBarrier()
+
+#endif
+
+/*
+ * If we have no memory barrier implementation for this architecture, we
+ * fall back to acquiring and releasing a spinlock.  This might, in turn,
+ * fall back to the semaphore-based spinlock implementation, which will be
+ * amazingly slow.
+ *
+ * It's not self-evident that every possible legal implementation of a
+ * spinlock acquire-and-release would be equivalent to a full memory barrier.
+ * For example, I'm not sure that Itanium's acq and rel add up to a full
+ * fence.  But all of our actual implementations seem OK in this regard.
+ */
+#if !defined(pg_memory_barrier)
+#define pg_memory_barrier(x) \
+	do { S_LOCK(&dummy_spinlock); S_UNLOCK(&dummy_spinlock); } while (0)
+#endif
+
+/*
+ * If read or write barriers are undefined, we upgrade them to full memory
+ * barriers.
+ *
+ * If a compiler barrier is unavailable, you probably don't want a full
+ * memory barrier instead, so if you have a use case for a compiler barrier,
+ * you'd better use #ifdef.
+ */
+#if !defined(pg_read_barrier)
+#define pg_read_barrier()			pg_memory_barrier()
+#endif
+#if !defined(pg_write_barrier)
+#define pg_write_barrier()			pg_memory_barrier()
+#endif
+
+#endif   /* BARRIER_H */
author	Robert Haas	2011-09-23 21:52:43 +0000
committer	Robert Haas	2011-09-23 21:52:43 +0000
commit	0c8eda6258805223fa412ab55a1f130fbc51afa0 (patch)
tree	9e20f64f3fb04df287b35e1f934032d7671a7316 /src/include/storage/barrier.h
parent	291873c1554ceecc71a81c25aef4f1260c15c222 (diff)