From e7bbf73220c96303e2d24186fbfc1aa71ed3cb25 Mon Sep 17 00:00:00 2001
From: Shinya Kato <shinya11.kato@gmail.com>
Date: Thu, 31 Jul 2025 20:10:14 +0900
Subject: [PATCH 01/15] Add support fo PostgreSQL 18

---
 lib/nbtree/nbtsort-18.c | 1963 +++++++++++++++++++++++++++++++++++++++
 lib/pg_btree.c          |    4 +-
 2 files changed, 1966 insertions(+), 1 deletion(-)
 create mode 100644 lib/nbtree/nbtsort-18.c

diff --git a/lib/nbtree/nbtsort-18.c b/lib/nbtree/nbtsort-18.c
new file mode 100644
index 0000000..f5d7b3b
--- /dev/null
+++ b/lib/nbtree/nbtsort-18.c
@@ -0,0 +1,1963 @@
+/*-------------------------------------------------------------------------
+ *
+ * nbtsort.c
+ *		Build a btree from sorted input by loading leaf pages sequentially.
+ *
+ * NOTES
+ *
+ * We use tuplesort.c to sort the given index tuples into order.
+ * Then we scan the index tuples in order and build the btree pages
+ * for each level.  We load source tuples into leaf-level pages.
+ * Whenever we fill a page at one level, we add a link to it to its
+ * parent level (starting a new parent level if necessary).  When
+ * done, we write out each final page on each level, adding it to
+ * its parent level.  When we have only one page on a level, it must be
+ * the root -- it can be attached to the btree metapage and we are done.
+ *
+ * It is not wise to pack the pages entirely full, since then *any*
+ * insertion would cause a split (and not only of the leaf page; the need
+ * for a split would cascade right up the tree).  The steady-state load
+ * factor for btrees is usually estimated at 70%.  We choose to pack leaf
+ * pages to the user-controllable fill factor (default 90%) while upper pages
+ * are always packed to 70%.  This gives us reasonable density (there aren't
+ * many upper pages if the keys are reasonable-size) without risking a lot of
+ * cascading splits during early insertions.
+ *
+ * We use the bulk smgr loading facility to bypass the buffer cache and
+ * WAL-log the pages efficiently.
+ *
+ * This code isn't concerned about the FSM at all. The caller is responsible
+ * for initializing that.
+ *
+ * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/access/nbtree/nbtsort.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/nbtree.h"
+#include "access/parallel.h"
+#include "access/relscan.h"
+#include "access/table.h"
+#include "access/xact.h"
+#include "access/xloginsert.h"
+#include "catalog/index.h"
+#include "commands/progress.h"
+#include "executor/instrument.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "storage/bulk_write.h"
+#include "tcop/tcopprot.h"		/* pgrminclude ignore */
+#include "utils/rel.h"
+#include "utils/sortsupport.h"
+#include "utils/tuplesort.h"
+
+
+/* Magic numbers for parallel state sharing */
+#define PARALLEL_KEY_BTREE_SHARED		UINT64CONST(0xA000000000000001)
+#define PARALLEL_KEY_TUPLESORT			UINT64CONST(0xA000000000000002)
+#define PARALLEL_KEY_TUPLESORT_SPOOL2	UINT64CONST(0xA000000000000003)
+#define PARALLEL_KEY_QUERY_TEXT			UINT64CONST(0xA000000000000004)
+#define PARALLEL_KEY_WAL_USAGE			UINT64CONST(0xA000000000000005)
+#define PARALLEL_KEY_BUFFER_USAGE		UINT64CONST(0xA000000000000006)
+
+/*
+ * DISABLE_LEADER_PARTICIPATION disables the leader's participation in
+ * parallel index builds.  This may be useful as a debugging aid.
+#undef DISABLE_LEADER_PARTICIPATION
+ */
+
+/*
+ * Status record for spooling/sorting phase.  (Note we may have two of
+ * these due to the special requirements for uniqueness-checking with
+ * dead tuples.)
+ */
+typedef struct BTSpool
+{
+	Tuplesortstate *sortstate;	/* state data for tuplesort.c */
+	Relation	heap;
+	Relation	index;
+	bool		isunique;
+	bool		nulls_not_distinct;
+} BTSpool;
+
+/*
+ * Status for index builds performed in parallel.  This is allocated in a
+ * dynamic shared memory segment.  Note that there is a separate tuplesort TOC
+ * entry, private to tuplesort.c but allocated by this module on its behalf.
+ */
+typedef struct BTShared
+{
+	/*
+	 * These fields are not modified during the sort.  They primarily exist
+	 * for the benefit of worker processes that need to create BTSpool state
+	 * corresponding to that used by the leader.
+	 */
+	Oid			heaprelid;
+	Oid			indexrelid;
+	bool		isunique;
+	bool		nulls_not_distinct;
+	bool		isconcurrent;
+	int			scantuplesortstates;
+
+	/*
+	 * workersdonecv is used to monitor the progress of workers.  All parallel
+	 * participants must indicate that they are done before leader can use
+	 * mutable state that workers maintain during scan (and before leader can
+	 * proceed to tuplesort_performsort()).
+	 */
+	ConditionVariable workersdonecv;
+
+	/*
+	 * mutex protects all fields before heapdesc.
+	 *
+	 * These fields contain status information of interest to B-Tree index
+	 * builds that must work just the same when an index is built in parallel.
+	 */
+	slock_t		mutex;
+
+	/*
+	 * Mutable state that is maintained by workers, and reported back to
+	 * leader at end of parallel scan.
+	 *
+	 * nparticipantsdone is number of worker processes finished.
+	 *
+	 * reltuples is the total number of input heap tuples.
+	 *
+	 * havedead indicates if RECENTLY_DEAD tuples were encountered during
+	 * build.
+	 *
+	 * indtuples is the total number of tuples that made it into the index.
+	 *
+	 * brokenhotchain indicates if any worker detected a broken HOT chain
+	 * during build.
+	 */
+	int			nparticipantsdone;
+	double		reltuples;
+	bool		havedead;
+	double		indtuples;
+	bool		brokenhotchain;
+
+	/*
+	 * ParallelTableScanDescData data follows. Can't directly embed here, as
+	 * implementations of the parallel table scan desc interface might need
+	 * stronger alignment.
+	 */
+} BTShared;
+
+/*
+ * Return pointer to a BTShared's parallel table scan.
+ *
+ * c.f. shm_toc_allocate as to why BUFFERALIGN is used, rather than just
+ * MAXALIGN.
+ */
+#define ParallelTableScanFromBTShared(shared) \
+	(ParallelTableScanDesc) ((char *) (shared) + BUFFERALIGN(sizeof(BTShared)))
+
+/*
+ * Status for leader in parallel index build.
+ */
+typedef struct BTLeader
+{
+	/* parallel context itself */
+	ParallelContext *pcxt;
+
+	/*
+	 * nparticipanttuplesorts is the exact number of worker processes
+	 * successfully launched, plus one leader process if it participates as a
+	 * worker (only DISABLE_LEADER_PARTICIPATION builds avoid leader
+	 * participating as a worker).
+	 */
+	int			nparticipanttuplesorts;
+
+	/*
+	 * Leader process convenience pointers to shared state (leader avoids TOC
+	 * lookups).
+	 *
+	 * btshared is the shared state for entire build.  sharedsort is the
+	 * shared, tuplesort-managed state passed to each process tuplesort.
+	 * sharedsort2 is the corresponding btspool2 shared state, used only when
+	 * building unique indexes.  snapshot is the snapshot used by the scan iff
+	 * an MVCC snapshot is required.
+	 */
+	BTShared   *btshared;
+	Sharedsort *sharedsort;
+	Sharedsort *sharedsort2;
+	Snapshot	snapshot;
+	WalUsage   *walusage;
+	BufferUsage *bufferusage;
+} BTLeader;
+
+/*
+ * Working state for btbuild and its callback.
+ *
+ * When parallel CREATE INDEX is used, there is a BTBuildState for each
+ * participant.
+ */
+typedef struct BTBuildState
+{
+	bool		isunique;
+	bool		nulls_not_distinct;
+	bool		havedead;
+	Relation	heap;
+	BTSpool    *spool;
+
+	/*
+	 * spool2 is needed only when the index is a unique index. Dead tuples are
+	 * put into spool2 instead of spool in order to avoid uniqueness check.
+	 */
+	BTSpool    *spool2;
+	double		indtuples;
+
+	/*
+	 * btleader is only present when a parallel index build is performed, and
+	 * only in the leader process. (Actually, only the leader has a
+	 * BTBuildState.  Workers have their own spool and spool2, though.)
+	 */
+	BTLeader   *btleader;
+} BTBuildState;
+
+/*
+ * Status record for a btree page being built.  We have one of these
+ * for each active tree level.
+ */
+typedef struct BTPageState
+{
+	BulkWriteBuffer btps_buf;	/* workspace for page building */
+	BlockNumber btps_blkno;		/* block # to write this page at */
+	IndexTuple	btps_lowkey;	/* page's strict lower bound pivot tuple */
+	OffsetNumber btps_lastoff;	/* last item offset loaded */
+	Size		btps_lastextra; /* last item's extra posting list space */
+	uint32		btps_level;		/* tree level (0 = leaf) */
+	Size		btps_full;		/* "full" if less than this much free space */
+	struct BTPageState *btps_next;	/* link to parent level, if any */
+} BTPageState;
+
+/*
+ * Overall status record for index writing phase.
+ */
+typedef struct BTWriteState
+{
+	Relation	heap;
+	Relation	index;
+	BulkWriteState *bulkstate;
+	BTScanInsert inskey;		/* generic insertion scankey */
+	BlockNumber btws_pages_alloced; /* # pages allocated */
+} BTWriteState;
+
+
+static double _bt_spools_heapscan(Relation heap, Relation index,
+								  BTBuildState *buildstate, IndexInfo *indexInfo);
+static void _bt_spooldestroy(BTSpool *btspool);
+static void _bt_spool(BTSpool *btspool, ItemPointer self,
+					  Datum *values, bool *isnull);
+static void _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2);
+static void _bt_build_callback(Relation index, ItemPointer tid, Datum *values,
+							   bool *isnull, bool tupleIsAlive, void *state);
+static BulkWriteBuffer _bt_blnewpage(BTWriteState *wstate, uint32 level);
+static BTPageState *_bt_pagestate(BTWriteState *wstate, uint32 level);
+static void _bt_slideleft(Page rightmostpage);
+static void _bt_sortaddtup(Page page, Size itemsize,
+						   IndexTuple itup, OffsetNumber itup_off,
+						   bool newfirstdataitem);
+static void _bt_buildadd(BTWriteState *wstate, BTPageState *state,
+						 IndexTuple itup, Size truncextra);
+static void _bt_sort_dedup_finish_pending(BTWriteState *wstate,
+										  BTPageState *state,
+										  BTDedupState dstate);
+static void _bt_uppershutdown(BTWriteState *wstate, BTPageState *state);
+static void _bt_load(BTWriteState *wstate,
+					 BTSpool *btspool, BTSpool *btspool2);
+static void _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent,
+							   int request);
+static void _bt_end_parallel(BTLeader *btleader);
+static Size _bt_parallel_estimate_shared(Relation heap, Snapshot snapshot);
+static double _bt_parallel_heapscan(BTBuildState *buildstate,
+									bool *brokenhotchain);
+static void _bt_leader_participate_as_worker(BTBuildState *buildstate);
+static void _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2,
+									   BTShared *btshared, Sharedsort *sharedsort,
+									   Sharedsort *sharedsort2, int sortmem,
+									   bool progress);
+
+
+/*
+ *	btbuild() -- build a new btree index.
+ */
+IndexBuildResult *
+btbuild(Relation heap, Relation index, IndexInfo *indexInfo)
+{
+	IndexBuildResult *result;
+	BTBuildState buildstate;
+	double		reltuples;
+
+#ifdef BTREE_BUILD_STATS
+	if (log_btree_build_stats)
+		ResetUsage();
+#endif							/* BTREE_BUILD_STATS */
+
+	buildstate.isunique = indexInfo->ii_Unique;
+	buildstate.nulls_not_distinct = indexInfo->ii_NullsNotDistinct;
+	buildstate.havedead = false;
+	buildstate.heap = heap;
+	buildstate.spool = NULL;
+	buildstate.spool2 = NULL;
+	buildstate.indtuples = 0;
+	buildstate.btleader = NULL;
+
+	/*
+	 * We expect to be called exactly once for any index relation. If that's
+	 * not the case, big trouble's what we have.
+	 */
+	if (RelationGetNumberOfBlocks(index) != 0)
+		elog(ERROR, "index \"%s\" already contains data",
+			 RelationGetRelationName(index));
+
+	reltuples = _bt_spools_heapscan(heap, index, &buildstate, indexInfo);
+
+	/*
+	 * Finish the build by (1) completing the sort of the spool file, (2)
+	 * inserting the sorted tuples into btree pages and (3) building the upper
+	 * levels.  Finally, it may also be necessary to end use of parallelism.
+	 */
+	_bt_leafbuild(buildstate.spool, buildstate.spool2);
+	_bt_spooldestroy(buildstate.spool);
+	if (buildstate.spool2)
+		_bt_spooldestroy(buildstate.spool2);
+	if (buildstate.btleader)
+		_bt_end_parallel(buildstate.btleader);
+
+	result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
+
+	result->heap_tuples = reltuples;
+	result->index_tuples = buildstate.indtuples;
+
+#ifdef BTREE_BUILD_STATS
+	if (log_btree_build_stats)
+	{
+		ShowUsage("BTREE BUILD STATS");
+		ResetUsage();
+	}
+#endif							/* BTREE_BUILD_STATS */
+
+	return result;
+}
+
+/*
+ * Create and initialize one or two spool structures, and save them in caller's
+ * buildstate argument.  May also fill-in fields within indexInfo used by index
+ * builds.
+ *
+ * Scans the heap, possibly in parallel, filling spools with IndexTuples.  This
+ * routine encapsulates all aspects of managing parallelism.  Caller need only
+ * call _bt_end_parallel() in parallel case after it is done with spool/spool2.
+ *
+ * Returns the total number of heap tuples scanned.
+ */
+static double
+_bt_spools_heapscan(Relation heap, Relation index, BTBuildState *buildstate,
+					IndexInfo *indexInfo)
+{
+	BTSpool    *btspool = (BTSpool *) palloc0(sizeof(BTSpool));
+	SortCoordinate coordinate = NULL;
+	double		reltuples = 0;
+
+	/*
+	 * We size the sort area as maintenance_work_mem rather than work_mem to
+	 * speed index creation.  This should be OK since a single backend can't
+	 * run multiple index creations in parallel (see also: notes on
+	 * parallelism and maintenance_work_mem below).
+	 */
+	btspool->heap = heap;
+	btspool->index = index;
+	btspool->isunique = indexInfo->ii_Unique;
+	btspool->nulls_not_distinct = indexInfo->ii_NullsNotDistinct;
+
+	/* Save as primary spool */
+	buildstate->spool = btspool;
+
+	/* Report table scan phase started */
+	pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
+								 PROGRESS_BTREE_PHASE_INDEXBUILD_TABLESCAN);
+
+	/* Attempt to launch parallel worker scan when required */
+	if (indexInfo->ii_ParallelWorkers > 0)
+		_bt_begin_parallel(buildstate, indexInfo->ii_Concurrent,
+						   indexInfo->ii_ParallelWorkers);
+
+	/*
+	 * If parallel build requested and at least one worker process was
+	 * successfully launched, set up coordination state
+	 */
+	if (buildstate->btleader)
+	{
+		coordinate = (SortCoordinate) palloc0(sizeof(SortCoordinateData));
+		coordinate->isWorker = false;
+		coordinate->nParticipants =
+			buildstate->btleader->nparticipanttuplesorts;
+		coordinate->sharedsort = buildstate->btleader->sharedsort;
+	}
+
+	/*
+	 * Begin serial/leader tuplesort.
+	 *
+	 * In cases where parallelism is involved, the leader receives the same
+	 * share of maintenance_work_mem as a serial sort (it is generally treated
+	 * in the same way as a serial sort once we return).  Parallel worker
+	 * Tuplesortstates will have received only a fraction of
+	 * maintenance_work_mem, though.
+	 *
+	 * We rely on the lifetime of the Leader Tuplesortstate almost not
+	 * overlapping with any worker Tuplesortstate's lifetime.  There may be
+	 * some small overlap, but that's okay because we rely on leader
+	 * Tuplesortstate only allocating a small, fixed amount of memory here.
+	 * When its tuplesort_performsort() is called (by our caller), and
+	 * significant amounts of memory are likely to be used, all workers must
+	 * have already freed almost all memory held by their Tuplesortstates
+	 * (they are about to go away completely, too).  The overall effect is
+	 * that maintenance_work_mem always represents an absolute high watermark
+	 * on the amount of memory used by a CREATE INDEX operation, regardless of
+	 * the use of parallelism or any other factor.
+	 */
+	buildstate->spool->sortstate =
+		tuplesort_begin_index_btree(heap, index, buildstate->isunique,
+									buildstate->nulls_not_distinct,
+									maintenance_work_mem, coordinate,
+									TUPLESORT_NONE);
+
+	/*
+	 * If building a unique index, put dead tuples in a second spool to keep
+	 * them out of the uniqueness check.  We expect that the second spool (for
+	 * dead tuples) won't get very full, so we give it only work_mem.
+	 */
+	if (indexInfo->ii_Unique)
+	{
+		BTSpool    *btspool2 = (BTSpool *) palloc0(sizeof(BTSpool));
+		SortCoordinate coordinate2 = NULL;
+
+		/* Initialize secondary spool */
+		btspool2->heap = heap;
+		btspool2->index = index;
+		btspool2->isunique = false;
+		/* Save as secondary spool */
+		buildstate->spool2 = btspool2;
+
+		if (buildstate->btleader)
+		{
+			/*
+			 * Set up non-private state that is passed to
+			 * tuplesort_begin_index_btree() about the basic high level
+			 * coordination of a parallel sort.
+			 */
+			coordinate2 = (SortCoordinate) palloc0(sizeof(SortCoordinateData));
+			coordinate2->isWorker = false;
+			coordinate2->nParticipants =
+				buildstate->btleader->nparticipanttuplesorts;
+			coordinate2->sharedsort = buildstate->btleader->sharedsort2;
+		}
+
+		/*
+		 * We expect that the second one (for dead tuples) won't get very
+		 * full, so we give it only work_mem
+		 */
+		buildstate->spool2->sortstate =
+			tuplesort_begin_index_btree(heap, index, false, false, work_mem,
+										coordinate2, TUPLESORT_NONE);
+	}
+
+	/* Fill spool using either serial or parallel heap scan */
+	if (!buildstate->btleader)
+		reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
+										   _bt_build_callback, (void *) buildstate,
+										   NULL);
+	else
+		reltuples = _bt_parallel_heapscan(buildstate,
+										  &indexInfo->ii_BrokenHotChain);
+
+	/*
+	 * Set the progress target for the next phase.  Reset the block number
+	 * values set by table_index_build_scan
+	 */
+	{
+		const int	progress_index[] = {
+			PROGRESS_CREATEIDX_TUPLES_TOTAL,
+			PROGRESS_SCAN_BLOCKS_TOTAL,
+			PROGRESS_SCAN_BLOCKS_DONE
+		};
+		const int64 progress_vals[] = {
+			buildstate->indtuples,
+			0, 0
+		};
+
+		pgstat_progress_update_multi_param(3, progress_index, progress_vals);
+	}
+
+	/* okay, all heap tuples are spooled */
+	if (buildstate->spool2 && !buildstate->havedead)
+	{
+		/* spool2 turns out to be unnecessary */
+		_bt_spooldestroy(buildstate->spool2);
+		buildstate->spool2 = NULL;
+	}
+
+	return reltuples;
+}
+
+/*
+ * clean up a spool structure and its substructures.
+ */
+static void
+_bt_spooldestroy(BTSpool *btspool)
+{
+	tuplesort_end(btspool->sortstate);
+	pfree(btspool);
+}
+
+/*
+ * spool an index entry into the sort file.
+ */
+static void
+_bt_spool(BTSpool *btspool, ItemPointer self, Datum *values, bool *isnull)
+{
+	tuplesort_putindextuplevalues(btspool->sortstate, btspool->index,
+								  self, values, isnull);
+}
+
+/*
+ * given a spool loaded by successive calls to _bt_spool,
+ * create an entire btree.
+ */
+static void
+_bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
+{
+	BTWriteState wstate;
+
+#ifdef BTREE_BUILD_STATS
+	if (log_btree_build_stats)
+	{
+		ShowUsage("BTREE BUILD (Spool) STATISTICS");
+		ResetUsage();
+	}
+#endif							/* BTREE_BUILD_STATS */
+
+	/* Execute the sort */
+	pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
+								 PROGRESS_BTREE_PHASE_PERFORMSORT_1);
+	tuplesort_performsort(btspool->sortstate);
+	if (btspool2)
+	{
+		pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
+									 PROGRESS_BTREE_PHASE_PERFORMSORT_2);
+		tuplesort_performsort(btspool2->sortstate);
+	}
+
+	wstate.heap = btspool->heap;
+	wstate.index = btspool->index;
+	wstate.inskey = _bt_mkscankey(wstate.index, NULL);
+	/* _bt_mkscankey() won't set allequalimage without metapage */
+	wstate.inskey->allequalimage = _bt_allequalimage(wstate.index, true);
+
+	/* reserve the metapage */
+	wstate.btws_pages_alloced = BTREE_METAPAGE + 1;
+
+	pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
+								 PROGRESS_BTREE_PHASE_LEAF_LOAD);
+	_bt_load(&wstate, btspool, btspool2);
+}
+
+/*
+ * Per-tuple callback for table_index_build_scan
+ */
+static void
+_bt_build_callback(Relation index,
+				   ItemPointer tid,
+				   Datum *values,
+				   bool *isnull,
+				   bool tupleIsAlive,
+				   void *state)
+{
+	BTBuildState *buildstate = (BTBuildState *) state;
+
+	/*
+	 * insert the index tuple into the appropriate spool file for subsequent
+	 * processing
+	 */
+	if (tupleIsAlive || buildstate->spool2 == NULL)
+		_bt_spool(buildstate->spool, tid, values, isnull);
+	else
+	{
+		/* dead tuples are put into spool2 */
+		buildstate->havedead = true;
+		_bt_spool(buildstate->spool2, tid, values, isnull);
+	}
+
+	buildstate->indtuples += 1;
+}
+
+/*
+ * allocate workspace for a new, clean btree page, not linked to any siblings.
+ */
+static BulkWriteBuffer
+_bt_blnewpage(BTWriteState *wstate, uint32 level)
+{
+	BulkWriteBuffer buf;
+	Page		page;
+	BTPageOpaque opaque;
+
+	buf = smgr_bulk_get_buf(wstate->bulkstate);
+	page = (Page) buf;
+
+	/* Zero the page and set up standard page header info */
+	_bt_pageinit(page, BLCKSZ);
+
+	/* Initialize BT opaque state */
+	opaque = BTPageGetOpaque(page);
+	opaque->btpo_prev = opaque->btpo_next = P_NONE;
+	opaque->btpo_level = level;
+	opaque->btpo_flags = (level > 0) ? 0 : BTP_LEAF;
+	opaque->btpo_cycleid = 0;
+
+	/* Make the P_HIKEY line pointer appear allocated */
+	((PageHeader) page)->pd_lower += sizeof(ItemIdData);
+
+	return buf;
+}
+
+/*
+ * emit a completed btree page, and release the working storage.
+ */
+static void
+_bt_blwritepage(BTWriteState *wstate, BulkWriteBuffer buf, BlockNumber blkno)
+{
+	smgr_bulk_write(wstate->bulkstate, blkno, buf, true);
+	/* smgr_bulk_write took ownership of 'buf' */
+}
+
+/*
+ * allocate and initialize a new BTPageState.  the returned structure
+ * is suitable for immediate use by _bt_buildadd.
+ */
+static BTPageState *
+_bt_pagestate(BTWriteState *wstate, uint32 level)
+{
+	BTPageState *state = (BTPageState *) palloc0(sizeof(BTPageState));
+
+	/* create initial page for level */
+	state->btps_buf = _bt_blnewpage(wstate, level);
+
+	/* and assign it a page position */
+	state->btps_blkno = wstate->btws_pages_alloced++;
+
+	state->btps_lowkey = NULL;
+	/* initialize lastoff so first item goes into P_FIRSTKEY */
+	state->btps_lastoff = P_HIKEY;
+	state->btps_lastextra = 0;
+	state->btps_level = level;
+	/* set "full" threshold based on level.  See notes at head of file. */
+	if (level > 0)
+		state->btps_full = (BLCKSZ * (100 - BTREE_NONLEAF_FILLFACTOR) / 100);
+	else
+		state->btps_full = BTGetTargetPageFreeSpace(wstate->index);
+
+	/* no parent level, yet */
+	state->btps_next = NULL;
+
+	return state;
+}
+
+/*
+ * Slide the array of ItemIds from the page back one slot (from P_FIRSTKEY to
+ * P_HIKEY, overwriting P_HIKEY).
+ *
+ * _bt_blnewpage() makes the P_HIKEY line pointer appear allocated, but the
+ * rightmost page on its level is not supposed to get a high key.  Now that
+ * it's clear that this page is a rightmost page, remove the unneeded empty
+ * P_HIKEY line pointer space.
+ */
+static void
+_bt_slideleft(Page rightmostpage)
+{
+	OffsetNumber off;
+	OffsetNumber maxoff;
+	ItemId		previi;
+
+	maxoff = PageGetMaxOffsetNumber(rightmostpage);
+	Assert(maxoff >= P_FIRSTKEY);
+	previi = PageGetItemId(rightmostpage, P_HIKEY);
+	for (off = P_FIRSTKEY; off <= maxoff; off = OffsetNumberNext(off))
+	{
+		ItemId		thisii = PageGetItemId(rightmostpage, off);
+
+		*previi = *thisii;
+		previi = thisii;
+	}
+	((PageHeader) rightmostpage)->pd_lower -= sizeof(ItemIdData);
+}
+
+/*
+ * Add an item to a page being built.
+ *
+ * This is very similar to nbtinsert.c's _bt_pgaddtup(), but this variant
+ * raises an error directly.
+ *
+ * Note that our nbtsort.c caller does not know yet if the page will be
+ * rightmost.  Offset P_FIRSTKEY is always assumed to be the first data key by
+ * caller.  Page that turns out to be the rightmost on its level is fixed by
+ * calling _bt_slideleft().
+ */
+static void
+_bt_sortaddtup(Page page,
+			   Size itemsize,
+			   IndexTuple itup,
+			   OffsetNumber itup_off,
+			   bool newfirstdataitem)
+{
+	IndexTupleData trunctuple;
+
+	if (newfirstdataitem)
+	{
+		trunctuple = *itup;
+		trunctuple.t_info = sizeof(IndexTupleData);
+		BTreeTupleSetNAtts(&trunctuple, 0, false);
+		itup = &trunctuple;
+		itemsize = sizeof(IndexTupleData);
+	}
+
+	if (PageAddItem(page, (Item) itup, itemsize, itup_off,
+					false, false) == InvalidOffsetNumber)
+		elog(ERROR, "failed to add item to the index page");
+}
+
+/*----------
+ * Add an item to a disk page from the sort output (or add a posting list
+ * item formed from the sort output).
+ *
+ * We must be careful to observe the page layout conventions of nbtsearch.c:
+ * - rightmost pages start data items at P_HIKEY instead of at P_FIRSTKEY.
+ * - on non-leaf pages, the key portion of the first item need not be
+ *	 stored, we should store only the link.
+ *
+ * A leaf page being built looks like:
+ *
+ * +----------------+---------------------------------+
+ * | PageHeaderData | linp0 linp1 linp2 ...           |
+ * +-----------+----+---------------------------------+
+ * | ... linpN |									  |
+ * +-----------+--------------------------------------+
+ * |	 ^ last										  |
+ * |												  |
+ * +-------------+------------------------------------+
+ * |			 | itemN ...                          |
+ * +-------------+------------------+-----------------+
+ * |		  ... item3 item2 item1 | "special space" |
+ * +--------------------------------+-----------------+
+ *
+ * Contrast this with the diagram in bufpage.h; note the mismatch
+ * between linps and items.  This is because we reserve linp0 as a
+ * placeholder for the pointer to the "high key" item; when we have
+ * filled up the page, we will set linp0 to point to itemN and clear
+ * linpN.  On the other hand, if we find this is the last (rightmost)
+ * page, we leave the items alone and slide the linp array over.  If
+ * the high key is to be truncated, offset 1 is deleted, and we insert
+ * the truncated high key at offset 1.
+ *
+ * 'last' pointer indicates the last offset added to the page.
+ *
+ * 'truncextra' is the size of the posting list in itup, if any.  This
+ * information is stashed for the next call here, when we may benefit
+ * from considering the impact of truncating away the posting list on
+ * the page before deciding to finish the page off.  Posting lists are
+ * often relatively large, so it is worth going to the trouble of
+ * accounting for the saving from truncating away the posting list of
+ * the tuple that becomes the high key (that may be the only way to
+ * get close to target free space on the page).  Note that this is
+ * only used for the soft fillfactor-wise limit, not the critical hard
+ * limit.
+ *----------
+ */
+static void
+_bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup,
+			 Size truncextra)
+{
+	BulkWriteBuffer nbuf;
+	Page		npage;
+	BlockNumber nblkno;
+	OffsetNumber last_off;
+	Size		last_truncextra;
+	Size		pgspc;
+	Size		itupsz;
+	bool		isleaf;
+
+	/*
+	 * This is a handy place to check for cancel interrupts during the btree
+	 * load phase of index creation.
+	 */
+	CHECK_FOR_INTERRUPTS();
+
+	nbuf = state->btps_buf;
+	npage = (Page) nbuf;
+	nblkno = state->btps_blkno;
+	last_off = state->btps_lastoff;
+	last_truncextra = state->btps_lastextra;
+	state->btps_lastextra = truncextra;
+
+	pgspc = PageGetFreeSpace(npage);
+	itupsz = IndexTupleSize(itup);
+	itupsz = MAXALIGN(itupsz);
+	/* Leaf case has slightly different rules due to suffix truncation */
+	isleaf = (state->btps_level == 0);
+
+	/*
+	 * Check whether the new item can fit on a btree page on current level at
+	 * all.
+	 *
+	 * Every newly built index will treat heap TID as part of the keyspace,
+	 * which imposes the requirement that new high keys must occasionally have
+	 * a heap TID appended within _bt_truncate().  That may leave a new pivot
+	 * tuple one or two MAXALIGN() quantums larger than the original
+	 * firstright tuple it's derived from.  v4 deals with the problem by
+	 * decreasing the limit on the size of tuples inserted on the leaf level
+	 * by the same small amount.  Enforce the new v4+ limit on the leaf level,
+	 * and the old limit on internal levels, since pivot tuples may need to
+	 * make use of the reserved space.  This should never fail on internal
+	 * pages.
+	 */
+	if (unlikely(itupsz > BTMaxItemSize(npage)))
+		_bt_check_third_page(wstate->index, wstate->heap, isleaf, npage,
+							 itup);
+
+	/*
+	 * Check to see if current page will fit new item, with space left over to
+	 * append a heap TID during suffix truncation when page is a leaf page.
+	 *
+	 * It is guaranteed that we can fit at least 2 non-pivot tuples plus a
+	 * high key with heap TID when finishing off a leaf page, since we rely on
+	 * _bt_check_third_page() rejecting oversized non-pivot tuples.  On
+	 * internal pages we can always fit 3 pivot tuples with larger internal
+	 * page tuple limit (includes page high key).
+	 *
+	 * Most of the time, a page is only "full" in the sense that the soft
+	 * fillfactor-wise limit has been exceeded.  However, we must always leave
+	 * at least two items plus a high key on each page before starting a new
+	 * page.  Disregard fillfactor and insert on "full" current page if we
+	 * don't have the minimum number of items yet.  (Note that we deliberately
+	 * assume that suffix truncation neither enlarges nor shrinks new high key
+	 * when applying soft limit, except when last tuple has a posting list.)
+	 */
+	Assert(last_truncextra == 0 || isleaf);
+	if (pgspc < itupsz + (isleaf ? MAXALIGN(sizeof(ItemPointerData)) : 0) ||
+		(pgspc + last_truncextra < state->btps_full && last_off > P_FIRSTKEY))
+	{
+		/*
+		 * Finish off the page and write it out.
+		 */
+		BulkWriteBuffer obuf = nbuf;
+		Page		opage = npage;
+		BlockNumber oblkno = nblkno;
+		ItemId		ii;
+		ItemId		hii;
+		IndexTuple	oitup;
+
+		/* Create new page of same level */
+		nbuf = _bt_blnewpage(wstate, state->btps_level);
+		npage = (Page) nbuf;
+
+		/* and assign it a page position */
+		nblkno = wstate->btws_pages_alloced++;
+
+		/*
+		 * We copy the last item on the page into the new page, and then
+		 * rearrange the old page so that the 'last item' becomes its high key
+		 * rather than a true data item.  There had better be at least two
+		 * items on the page already, else the page would be empty of useful
+		 * data.
+		 */
+		Assert(last_off > P_FIRSTKEY);
+		ii = PageGetItemId(opage, last_off);
+		oitup = (IndexTuple) PageGetItem(opage, ii);
+		_bt_sortaddtup(npage, ItemIdGetLength(ii), oitup, P_FIRSTKEY,
+					   !isleaf);
+
+		/*
+		 * Move 'last' into the high key position on opage.  _bt_blnewpage()
+		 * allocated empty space for a line pointer when opage was first
+		 * created, so this is a matter of rearranging already-allocated space
+		 * on page, and initializing high key line pointer. (Actually, leaf
+		 * pages must also swap oitup with a truncated version of oitup, which
+		 * is sometimes larger than oitup, though never by more than the space
+		 * needed to append a heap TID.)
+		 */
+		hii = PageGetItemId(opage, P_HIKEY);
+		*hii = *ii;
+		ItemIdSetUnused(ii);	/* redundant */
+		((PageHeader) opage)->pd_lower -= sizeof(ItemIdData);
+
+		if (isleaf)
+		{
+			IndexTuple	lastleft;
+			IndexTuple	truncated;
+
+			/*
+			 * Truncate away any unneeded attributes from high key on leaf
+			 * level.  This is only done at the leaf level because downlinks
+			 * in internal pages are either negative infinity items, or get
+			 * their contents from copying from one level down.  See also:
+			 * _bt_split().
+			 *
+			 * We don't try to bias our choice of split point to make it more
+			 * likely that _bt_truncate() can truncate away more attributes,
+			 * whereas the split point used within _bt_split() is chosen much
+			 * more delicately.  Even still, the lastleft and firstright
+			 * tuples passed to _bt_truncate() here are at least not fully
+			 * equal to each other when deduplication is used, unless there is
+			 * a large group of duplicates (also, unique index builds usually
+			 * have few or no spool2 duplicates).  When the split point is
+			 * between two unequal tuples, _bt_truncate() will avoid including
+			 * a heap TID in the new high key, which is the most important
+			 * benefit of suffix truncation.
+			 *
+			 * Overwrite the old item with new truncated high key directly.
+			 * oitup is already located at the physical beginning of tuple
+			 * space, so this should directly reuse the existing tuple space.
+			 */
+			ii = PageGetItemId(opage, OffsetNumberPrev(last_off));
+			lastleft = (IndexTuple) PageGetItem(opage, ii);
+
+			Assert(IndexTupleSize(oitup) > last_truncextra);
+			truncated = _bt_truncate(wstate->index, lastleft, oitup,
+									 wstate->inskey);
+			if (!PageIndexTupleOverwrite(opage, P_HIKEY, (Item) truncated,
+										 IndexTupleSize(truncated)))
+				elog(ERROR, "failed to add high key to the index page");
+			pfree(truncated);
+
+			/* oitup should continue to point to the page's high key */
+			hii = PageGetItemId(opage, P_HIKEY);
+			oitup = (IndexTuple) PageGetItem(opage, hii);
+		}
+
+		/*
+		 * Link the old page into its parent, using its low key.  If we don't
+		 * have a parent, we have to create one; this adds a new btree level.
+		 */
+		if (state->btps_next == NULL)
+			state->btps_next = _bt_pagestate(wstate, state->btps_level + 1);
+
+		Assert((BTreeTupleGetNAtts(state->btps_lowkey, wstate->index) <=
+				IndexRelationGetNumberOfKeyAttributes(wstate->index) &&
+				BTreeTupleGetNAtts(state->btps_lowkey, wstate->index) > 0) ||
+			   P_LEFTMOST(BTPageGetOpaque(opage)));
+		Assert(BTreeTupleGetNAtts(state->btps_lowkey, wstate->index) == 0 ||
+			   !P_LEFTMOST(BTPageGetOpaque(opage)));
+		BTreeTupleSetDownLink(state->btps_lowkey, oblkno);
+		_bt_buildadd(wstate, state->btps_next, state->btps_lowkey, 0);
+		pfree(state->btps_lowkey);
+
+		/*
+		 * Save a copy of the high key from the old page.  It is also the low
+		 * key for the new page.
+		 */
+		state->btps_lowkey = CopyIndexTuple(oitup);
+
+		/*
+		 * Set the sibling links for both pages.
+		 */
+		{
+			BTPageOpaque oopaque = BTPageGetOpaque(opage);
+			BTPageOpaque nopaque = BTPageGetOpaque(npage);
+
+			oopaque->btpo_next = nblkno;
+			nopaque->btpo_prev = oblkno;
+			nopaque->btpo_next = P_NONE;	/* redundant */
+		}
+
+		/*
+		 * Write out the old page. _bt_blwritepage takes ownership of the
+		 * 'opage' buffer.
+		 */
+		_bt_blwritepage(wstate, obuf, oblkno);
+
+		/*
+		 * Reset last_off to point to new page
+		 */
+		last_off = P_FIRSTKEY;
+	}
+
+	/*
+	 * By here, either original page is still the current page, or a new page
+	 * was created that became the current page.  Either way, the current page
+	 * definitely has space for new item.
+	 *
+	 * If the new item is the first for its page, it must also be the first
+	 * item on its entire level.  On later same-level pages, a low key for a
+	 * page will be copied from the prior page in the code above.  Generate a
+	 * minus infinity low key here instead.
+	 */
+	if (last_off == P_HIKEY)
+	{
+		Assert(state->btps_lowkey == NULL);
+		state->btps_lowkey = palloc0(sizeof(IndexTupleData));
+		state->btps_lowkey->t_info = sizeof(IndexTupleData);
+		BTreeTupleSetNAtts(state->btps_lowkey, 0, false);
+	}
+
+	/*
+	 * Add the new item into the current page.
+	 */
+	last_off = OffsetNumberNext(last_off);
+	_bt_sortaddtup(npage, itupsz, itup, last_off,
+				   !isleaf && last_off == P_FIRSTKEY);
+
+	state->btps_buf = nbuf;
+	state->btps_blkno = nblkno;
+	state->btps_lastoff = last_off;
+}
+
+/*
+ * Finalize pending posting list tuple, and add it to the index.  Final tuple
+ * is based on saved base tuple, and saved list of heap TIDs.
+ *
+ * This is almost like _bt_dedup_finish_pending(), but it adds a new tuple
+ * using _bt_buildadd().
+ */
+static void
+_bt_sort_dedup_finish_pending(BTWriteState *wstate, BTPageState *state,
+							  BTDedupState dstate)
+{
+	Assert(dstate->nitems > 0);
+
+	if (dstate->nitems == 1)
+		_bt_buildadd(wstate, state, dstate->base, 0);
+	else
+	{
+		IndexTuple	postingtuple;
+		Size		truncextra;
+
+		/* form a tuple with a posting list */
+		postingtuple = _bt_form_posting(dstate->base,
+										dstate->htids,
+										dstate->nhtids);
+		/* Calculate posting list overhead */
+		truncextra = IndexTupleSize(postingtuple) -
+			BTreeTupleGetPostingOffset(postingtuple);
+
+		_bt_buildadd(wstate, state, postingtuple, truncextra);
+		pfree(postingtuple);
+	}
+
+	dstate->nmaxitems = 0;
+	dstate->nhtids = 0;
+	dstate->nitems = 0;
+	dstate->phystupsize = 0;
+}
+
+/*
+ * Finish writing out the completed btree.
+ */
+static void
+_bt_uppershutdown(BTWriteState *wstate, BTPageState *state)
+{
+	BTPageState *s;
+	BlockNumber rootblkno = P_NONE;
+	uint32		rootlevel = 0;
+	BulkWriteBuffer metabuf;
+
+	/*
+	 * Each iteration of this loop completes one more level of the tree.
+	 */
+	for (s = state; s != NULL; s = s->btps_next)
+	{
+		BlockNumber blkno;
+		BTPageOpaque opaque;
+
+		blkno = s->btps_blkno;
+		opaque = BTPageGetOpaque((Page) s->btps_buf);
+
+		/*
+		 * We have to link the last page on this level to somewhere.
+		 *
+		 * If we're at the top, it's the root, so attach it to the metapage.
+		 * Otherwise, add an entry for it to its parent using its low key.
+		 * This may cause the last page of the parent level to split, but
+		 * that's not a problem -- we haven't gotten to it yet.
+		 */
+		if (s->btps_next == NULL)
+		{
+			opaque->btpo_flags |= BTP_ROOT;
+			rootblkno = blkno;
+			rootlevel = s->btps_level;
+		}
+		else
+		{
+			Assert((BTreeTupleGetNAtts(s->btps_lowkey, wstate->index) <=
+					IndexRelationGetNumberOfKeyAttributes(wstate->index) &&
+					BTreeTupleGetNAtts(s->btps_lowkey, wstate->index) > 0) ||
+				   P_LEFTMOST(opaque));
+			Assert(BTreeTupleGetNAtts(s->btps_lowkey, wstate->index) == 0 ||
+				   !P_LEFTMOST(opaque));
+			BTreeTupleSetDownLink(s->btps_lowkey, blkno);
+			_bt_buildadd(wstate, s->btps_next, s->btps_lowkey, 0);
+			pfree(s->btps_lowkey);
+			s->btps_lowkey = NULL;
+		}
+
+		/*
+		 * This is the rightmost page, so the ItemId array needs to be slid
+		 * back one slot.  Then we can dump out the page.
+		 */
+		_bt_slideleft((Page) s->btps_buf);
+		_bt_blwritepage(wstate, s->btps_buf, s->btps_blkno);
+		s->btps_buf = NULL;		/* writepage took ownership of the buffer */
+	}
+
+	/*
+	 * As the last step in the process, construct the metapage and make it
+	 * point to the new root (unless we had no data at all, in which case it's
+	 * set to point to "P_NONE").  This changes the index to the "valid" state
+	 * by filling in a valid magic number in the metapage.
+	 */
+	metabuf = smgr_bulk_get_buf(wstate->bulkstate);
+	_bt_initmetapage((Page) metabuf, rootblkno, rootlevel,
+					 wstate->inskey->allequalimage);
+	_bt_blwritepage(wstate, metabuf, BTREE_METAPAGE);
+}
+
+/*
+ * Read tuples in correct sort order from tuplesort, and load them into
+ * btree leaves.
+ */
+static void
+_bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
+{
+	BTPageState *state = NULL;
+	bool		merge = (btspool2 != NULL);
+	IndexTuple	itup,
+				itup2 = NULL;
+	bool		load1;
+	TupleDesc	tupdes = RelationGetDescr(wstate->index);
+	int			i,
+				keysz = IndexRelationGetNumberOfKeyAttributes(wstate->index);
+	SortSupport sortKeys;
+	int64		tuples_done = 0;
+	bool		deduplicate;
+
+	wstate->bulkstate = smgr_bulk_start_rel(wstate->index, MAIN_FORKNUM);
+
+	deduplicate = wstate->inskey->allequalimage && !btspool->isunique &&
+		BTGetDeduplicateItems(wstate->index);
+
+	if (merge)
+	{
+		/*
+		 * Another BTSpool for dead tuples exists. Now we have to merge
+		 * btspool and btspool2.
+		 */
+
+		/* the preparation of merge */
+		itup = tuplesort_getindextuple(btspool->sortstate, true);
+		itup2 = tuplesort_getindextuple(btspool2->sortstate, true);
+
+		/* Prepare SortSupport data for each column */
+		sortKeys = (SortSupport) palloc0(keysz * sizeof(SortSupportData));
+
+		for (i = 0; i < keysz; i++)
+		{
+			SortSupport sortKey = sortKeys + i;
+			ScanKey		scanKey = wstate->inskey->scankeys + i;
+			int16		strategy;
+
+			sortKey->ssup_cxt = CurrentMemoryContext;
+			sortKey->ssup_collation = scanKey->sk_collation;
+			sortKey->ssup_nulls_first =
+				(scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0;
+			sortKey->ssup_attno = scanKey->sk_attno;
+			/* Abbreviation is not supported here */
+			sortKey->abbreviate = false;
+
+			Assert(sortKey->ssup_attno != 0);
+
+			strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ?
+				BTGreaterStrategyNumber : BTLessStrategyNumber;
+
+			PrepareSortSupportFromIndexRel(wstate->index, strategy, sortKey);
+		}
+
+		for (;;)
+		{
+			load1 = true;		/* load BTSpool next ? */
+			if (itup2 == NULL)
+			{
+				if (itup == NULL)
+					break;
+			}
+			else if (itup != NULL)
+			{
+				int32		compare = 0;
+
+				for (i = 1; i <= keysz; i++)
+				{
+					SortSupport entry;
+					Datum		attrDatum1,
+								attrDatum2;
+					bool		isNull1,
+								isNull2;
+
+					entry = sortKeys + i - 1;
+					attrDatum1 = index_getattr(itup, i, tupdes, &isNull1);
+					attrDatum2 = index_getattr(itup2, i, tupdes, &isNull2);
+
+					compare = ApplySortComparator(attrDatum1, isNull1,
+												  attrDatum2, isNull2,
+												  entry);
+					if (compare > 0)
+					{
+						load1 = false;
+						break;
+					}
+					else if (compare < 0)
+						break;
+				}
+
+				/*
+				 * If key values are equal, we sort on ItemPointer.  This is
+				 * required for btree indexes, since heap TID is treated as an
+				 * implicit last key attribute in order to ensure that all
+				 * keys in the index are physically unique.
+				 */
+				if (compare == 0)
+				{
+					compare = ItemPointerCompare(&itup->t_tid, &itup2->t_tid);
+					Assert(compare != 0);
+					if (compare > 0)
+						load1 = false;
+				}
+			}
+			else
+				load1 = false;
+
+			/* When we see first tuple, create first index page */
+			if (state == NULL)
+				state = _bt_pagestate(wstate, 0);
+
+			if (load1)
+			{
+				_bt_buildadd(wstate, state, itup, 0);
+				itup = tuplesort_getindextuple(btspool->sortstate, true);
+			}
+			else
+			{
+				_bt_buildadd(wstate, state, itup2, 0);
+				itup2 = tuplesort_getindextuple(btspool2->sortstate, true);
+			}
+
+			/* Report progress */
+			pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,
+										 ++tuples_done);
+		}
+		pfree(sortKeys);
+	}
+	else if (deduplicate)
+	{
+		/* merge is unnecessary, deduplicate into posting lists */
+		BTDedupState dstate;
+
+		dstate = (BTDedupState) palloc(sizeof(BTDedupStateData));
+		dstate->deduplicate = true; /* unused */
+		dstate->nmaxitems = 0;	/* unused */
+		dstate->maxpostingsize = 0; /* set later */
+		/* Metadata about base tuple of current pending posting list */
+		dstate->base = NULL;
+		dstate->baseoff = InvalidOffsetNumber;	/* unused */
+		dstate->basetupsize = 0;
+		/* Metadata about current pending posting list TIDs */
+		dstate->htids = NULL;
+		dstate->nhtids = 0;
+		dstate->nitems = 0;
+		dstate->phystupsize = 0;	/* unused */
+		dstate->nintervals = 0; /* unused */
+
+		while ((itup = tuplesort_getindextuple(btspool->sortstate,
+											   true)) != NULL)
+		{
+			/* When we see first tuple, create first index page */
+			if (state == NULL)
+			{
+				state = _bt_pagestate(wstate, 0);
+
+				/*
+				 * Limit size of posting list tuples to 1/10 space we want to
+				 * leave behind on the page, plus space for final item's line
+				 * pointer.  This is equal to the space that we'd like to
+				 * leave behind on each leaf page when fillfactor is 90,
+				 * allowing us to get close to fillfactor% space utilization
+				 * when there happen to be a great many duplicates.  (This
+				 * makes higher leaf fillfactor settings ineffective when
+				 * building indexes that have many duplicates, but packing
+				 * leaf pages full with few very large tuples doesn't seem
+				 * like a useful goal.)
+				 */
+				dstate->maxpostingsize = MAXALIGN_DOWN((BLCKSZ * 10 / 100)) -
+					sizeof(ItemIdData);
+				Assert(dstate->maxpostingsize <= BTMaxItemSize((Page) state->btps_buf) &&
+					   dstate->maxpostingsize <= INDEX_SIZE_MASK);
+				dstate->htids = palloc(dstate->maxpostingsize);
+
+				/* start new pending posting list with itup copy */
+				_bt_dedup_start_pending(dstate, CopyIndexTuple(itup),
+										InvalidOffsetNumber);
+			}
+			else if (_bt_keep_natts_fast(wstate->index, dstate->base,
+										 itup) > keysz &&
+					 _bt_dedup_save_htid(dstate, itup))
+			{
+				/*
+				 * Tuple is equal to base tuple of pending posting list.  Heap
+				 * TID from itup has been saved in state.
+				 */
+			}
+			else
+			{
+				/*
+				 * Tuple is not equal to pending posting list tuple, or
+				 * _bt_dedup_save_htid() opted to not merge current item into
+				 * pending posting list.
+				 */
+				_bt_sort_dedup_finish_pending(wstate, state, dstate);
+				pfree(dstate->base);
+
+				/* start new pending posting list with itup copy */
+				_bt_dedup_start_pending(dstate, CopyIndexTuple(itup),
+										InvalidOffsetNumber);
+			}
+
+			/* Report progress */
+			pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,
+										 ++tuples_done);
+		}
+
+		if (state)
+		{
+			/*
+			 * Handle the last item (there must be a last item when the
+			 * tuplesort returned one or more tuples)
+			 */
+			_bt_sort_dedup_finish_pending(wstate, state, dstate);
+			pfree(dstate->base);
+			pfree(dstate->htids);
+		}
+
+		pfree(dstate);
+	}
+	else
+	{
+		/* merging and deduplication are both unnecessary */
+		while ((itup = tuplesort_getindextuple(btspool->sortstate,
+											   true)) != NULL)
+		{
+			/* When we see first tuple, create first index page */
+			if (state == NULL)
+				state = _bt_pagestate(wstate, 0);
+
+			_bt_buildadd(wstate, state, itup, 0);
+
+			/* Report progress */
+			pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,
+										 ++tuples_done);
+		}
+	}
+
+	/* Close down final pages and write the metapage */
+	_bt_uppershutdown(wstate, state);
+	smgr_bulk_finish(wstate->bulkstate);
+}
+
+/*
+ * Create parallel context, and launch workers for leader.
+ *
+ * buildstate argument should be initialized (with the exception of the
+ * tuplesort state in spools, which may later be created based on shared
+ * state initially set up here).
+ *
+ * isconcurrent indicates if operation is CREATE INDEX CONCURRENTLY.
+ *
+ * request is the target number of parallel worker processes to launch.
+ *
+ * Sets buildstate's BTLeader, which caller must use to shut down parallel
+ * mode by passing it to _bt_end_parallel() at the very end of its index
+ * build.  If not even a single worker process can be launched, this is
+ * never set, and caller should proceed with a serial index build.
+ */
+static void
+_bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
+{
+	ParallelContext *pcxt;
+	int			scantuplesortstates;
+	Snapshot	snapshot;
+	Size		estbtshared;
+	Size		estsort;
+	BTShared   *btshared;
+	Sharedsort *sharedsort;
+	Sharedsort *sharedsort2;
+	BTSpool    *btspool = buildstate->spool;
+	BTLeader   *btleader = (BTLeader *) palloc0(sizeof(BTLeader));
+	WalUsage   *walusage;
+	BufferUsage *bufferusage;
+	bool		leaderparticipates = true;
+	int			querylen;
+
+#ifdef DISABLE_LEADER_PARTICIPATION
+	leaderparticipates = false;
+#endif
+
+	/*
+	 * Enter parallel mode, and create context for parallel build of btree
+	 * index
+	 */
+	EnterParallelMode();
+	Assert(request > 0);
+	pcxt = CreateParallelContext("postgres", "_bt_parallel_build_main",
+								 request);
+
+	scantuplesortstates = leaderparticipates ? request + 1 : request;
+
+	/*
+	 * Prepare for scan of the base relation.  In a normal index build, we use
+	 * SnapshotAny because we must retrieve all tuples and do our own time
+	 * qual checks (because we have to index RECENTLY_DEAD tuples).  In a
+	 * concurrent build, we take a regular MVCC snapshot and index whatever's
+	 * live according to that.
+	 */
+	if (!isconcurrent)
+		snapshot = SnapshotAny;
+	else
+		snapshot = RegisterSnapshot(GetTransactionSnapshot());
+
+	/*
+	 * Estimate size for our own PARALLEL_KEY_BTREE_SHARED workspace, and
+	 * PARALLEL_KEY_TUPLESORT tuplesort workspace
+	 */
+	estbtshared = _bt_parallel_estimate_shared(btspool->heap, snapshot);
+	shm_toc_estimate_chunk(&pcxt->estimator, estbtshared);
+	estsort = tuplesort_estimate_shared(scantuplesortstates);
+	shm_toc_estimate_chunk(&pcxt->estimator, estsort);
+
+	/*
+	 * Unique case requires a second spool, and so we may have to account for
+	 * another shared workspace for that -- PARALLEL_KEY_TUPLESORT_SPOOL2
+	 */
+	if (!btspool->isunique)
+		shm_toc_estimate_keys(&pcxt->estimator, 2);
+	else
+	{
+		shm_toc_estimate_chunk(&pcxt->estimator, estsort);
+		shm_toc_estimate_keys(&pcxt->estimator, 3);
+	}
+
+	/*
+	 * Estimate space for WalUsage and BufferUsage -- PARALLEL_KEY_WAL_USAGE
+	 * and PARALLEL_KEY_BUFFER_USAGE.
+	 *
+	 * If there are no extensions loaded that care, we could skip this.  We
+	 * have no way of knowing whether anyone's looking at pgWalUsage or
+	 * pgBufferUsage, so do it unconditionally.
+	 */
+	shm_toc_estimate_chunk(&pcxt->estimator,
+						   mul_size(sizeof(WalUsage), pcxt->nworkers));
+	shm_toc_estimate_keys(&pcxt->estimator, 1);
+	shm_toc_estimate_chunk(&pcxt->estimator,
+						   mul_size(sizeof(BufferUsage), pcxt->nworkers));
+	shm_toc_estimate_keys(&pcxt->estimator, 1);
+
+	/* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */
+	if (debug_query_string)
+	{
+		querylen = strlen(debug_query_string);
+		shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
+		shm_toc_estimate_keys(&pcxt->estimator, 1);
+	}
+	else
+		querylen = 0;			/* keep compiler quiet */
+
+	/* Everyone's had a chance to ask for space, so now create the DSM */
+	InitializeParallelDSM(pcxt);
+
+	/* If no DSM segment was available, back out (do serial build) */
+	if (pcxt->seg == NULL)
+	{
+		if (IsMVCCSnapshot(snapshot))
+			UnregisterSnapshot(snapshot);
+		DestroyParallelContext(pcxt);
+		ExitParallelMode();
+		return;
+	}
+
+	/* Store shared build state, for which we reserved space */
+	btshared = (BTShared *) shm_toc_allocate(pcxt->toc, estbtshared);
+	/* Initialize immutable state */
+	btshared->heaprelid = RelationGetRelid(btspool->heap);
+	btshared->indexrelid = RelationGetRelid(btspool->index);
+	btshared->isunique = btspool->isunique;
+	btshared->nulls_not_distinct = btspool->nulls_not_distinct;
+	btshared->isconcurrent = isconcurrent;
+	btshared->scantuplesortstates = scantuplesortstates;
+	ConditionVariableInit(&btshared->workersdonecv);
+	SpinLockInit(&btshared->mutex);
+	/* Initialize mutable state */
+	btshared->nparticipantsdone = 0;
+	btshared->reltuples = 0.0;
+	btshared->havedead = false;
+	btshared->indtuples = 0.0;
+	btshared->brokenhotchain = false;
+	table_parallelscan_initialize(btspool->heap,
+								  ParallelTableScanFromBTShared(btshared),
+								  snapshot);
+
+	/*
+	 * Store shared tuplesort-private state, for which we reserved space.
+	 * Then, initialize opaque state using tuplesort routine.
+	 */
+	sharedsort = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
+	tuplesort_initialize_shared(sharedsort, scantuplesortstates,
+								pcxt->seg);
+
+	shm_toc_insert(pcxt->toc, PARALLEL_KEY_BTREE_SHARED, btshared);
+	shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT, sharedsort);
+
+	/* Unique case requires a second spool, and associated shared state */
+	if (!btspool->isunique)
+		sharedsort2 = NULL;
+	else
+	{
+		/*
+		 * Store additional shared tuplesort-private state, for which we
+		 * reserved space.  Then, initialize opaque state using tuplesort
+		 * routine.
+		 */
+		sharedsort2 = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
+		tuplesort_initialize_shared(sharedsort2, scantuplesortstates,
+									pcxt->seg);
+
+		shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT_SPOOL2, sharedsort2);
+	}
+
+	/* Store query string for workers */
+	if (debug_query_string)
+	{
+		char	   *sharedquery;
+
+		sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
+		memcpy(sharedquery, debug_query_string, querylen + 1);
+		shm_toc_insert(pcxt->toc, PARALLEL_KEY_QUERY_TEXT, sharedquery);
+	}
+
+	/*
+	 * Allocate space for each worker's WalUsage and BufferUsage; no need to
+	 * initialize.
+	 */
+	walusage = shm_toc_allocate(pcxt->toc,
+								mul_size(sizeof(WalUsage), pcxt->nworkers));
+	shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage);
+	bufferusage = shm_toc_allocate(pcxt->toc,
+								   mul_size(sizeof(BufferUsage), pcxt->nworkers));
+	shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage);
+
+	/* Launch workers, saving status for leader/caller */
+	LaunchParallelWorkers(pcxt);
+	btleader->pcxt = pcxt;
+	btleader->nparticipanttuplesorts = pcxt->nworkers_launched;
+	if (leaderparticipates)
+		btleader->nparticipanttuplesorts++;
+	btleader->btshared = btshared;
+	btleader->sharedsort = sharedsort;
+	btleader->sharedsort2 = sharedsort2;
+	btleader->snapshot = snapshot;
+	btleader->walusage = walusage;
+	btleader->bufferusage = bufferusage;
+
+	/* If no workers were successfully launched, back out (do serial build) */
+	if (pcxt->nworkers_launched == 0)
+	{
+		_bt_end_parallel(btleader);
+		return;
+	}
+
+	/* Save leader state now that it's clear build will be parallel */
+	buildstate->btleader = btleader;
+
+	/* Join heap scan ourselves */
+	if (leaderparticipates)
+		_bt_leader_participate_as_worker(buildstate);
+
+	/*
+	 * Caller needs to wait for all launched workers when we return.  Make
+	 * sure that the failure-to-start case will not hang forever.
+	 */
+	WaitForParallelWorkersToAttach(pcxt);
+}
+
+/*
+ * Shut down workers, destroy parallel context, and end parallel mode.
+ */
+static void
+_bt_end_parallel(BTLeader *btleader)
+{
+	int			i;
+
+	/* Shutdown worker processes */
+	WaitForParallelWorkersToFinish(btleader->pcxt);
+
+	/*
+	 * Next, accumulate WAL usage.  (This must wait for the workers to finish,
+	 * or we might get incomplete data.)
+	 */
+	for (i = 0; i < btleader->pcxt->nworkers_launched; i++)
+		InstrAccumParallelQuery(&btleader->bufferusage[i], &btleader->walusage[i]);
+
+	/* Free last reference to MVCC snapshot, if one was used */
+	if (IsMVCCSnapshot(btleader->snapshot))
+		UnregisterSnapshot(btleader->snapshot);
+	DestroyParallelContext(btleader->pcxt);
+	ExitParallelMode();
+}
+
+/*
+ * Returns size of shared memory required to store state for a parallel
+ * btree index build based on the snapshot its parallel scan will use.
+ */
+static Size
+_bt_parallel_estimate_shared(Relation heap, Snapshot snapshot)
+{
+	/* c.f. shm_toc_allocate as to why BUFFERALIGN is used */
+	return add_size(BUFFERALIGN(sizeof(BTShared)),
+					table_parallelscan_estimate(heap, snapshot));
+}
+
+/*
+ * Within leader, wait for end of heap scan.
+ *
+ * When called, parallel heap scan started by _bt_begin_parallel() will
+ * already be underway within worker processes (when leader participates
+ * as a worker, we should end up here just as workers are finishing).
+ *
+ * Fills in fields needed for ambuild statistics, and lets caller set
+ * field indicating that some worker encountered a broken HOT chain.
+ *
+ * Returns the total number of heap tuples scanned.
+ */
+static double
+_bt_parallel_heapscan(BTBuildState *buildstate, bool *brokenhotchain)
+{
+	BTShared   *btshared = buildstate->btleader->btshared;
+	int			nparticipanttuplesorts;
+	double		reltuples;
+
+	nparticipanttuplesorts = buildstate->btleader->nparticipanttuplesorts;
+	for (;;)
+	{
+		SpinLockAcquire(&btshared->mutex);
+		if (btshared->nparticipantsdone == nparticipanttuplesorts)
+		{
+			buildstate->havedead = btshared->havedead;
+			buildstate->indtuples = btshared->indtuples;
+			*brokenhotchain = btshared->brokenhotchain;
+			reltuples = btshared->reltuples;
+			SpinLockRelease(&btshared->mutex);
+			break;
+		}
+		SpinLockRelease(&btshared->mutex);
+
+		ConditionVariableSleep(&btshared->workersdonecv,
+							   WAIT_EVENT_PARALLEL_CREATE_INDEX_SCAN);
+	}
+
+	ConditionVariableCancelSleep();
+
+	return reltuples;
+}
+
+/*
+ * Within leader, participate as a parallel worker.
+ */
+static void
+_bt_leader_participate_as_worker(BTBuildState *buildstate)
+{
+	BTLeader   *btleader = buildstate->btleader;
+	BTSpool    *leaderworker;
+	BTSpool    *leaderworker2;
+	int			sortmem;
+
+	/* Allocate memory and initialize private spool */
+	leaderworker = (BTSpool *) palloc0(sizeof(BTSpool));
+	leaderworker->heap = buildstate->spool->heap;
+	leaderworker->index = buildstate->spool->index;
+	leaderworker->isunique = buildstate->spool->isunique;
+	leaderworker->nulls_not_distinct = buildstate->spool->nulls_not_distinct;
+
+	/* Initialize second spool, if required */
+	if (!btleader->btshared->isunique)
+		leaderworker2 = NULL;
+	else
+	{
+		/* Allocate memory for worker's own private secondary spool */
+		leaderworker2 = (BTSpool *) palloc0(sizeof(BTSpool));
+
+		/* Initialize worker's own secondary spool */
+		leaderworker2->heap = leaderworker->heap;
+		leaderworker2->index = leaderworker->index;
+		leaderworker2->isunique = false;
+	}
+
+	/*
+	 * Might as well use reliable figure when doling out maintenance_work_mem
+	 * (when requested number of workers were not launched, this will be
+	 * somewhat higher than it is for other workers).
+	 */
+	sortmem = maintenance_work_mem / btleader->nparticipanttuplesorts;
+
+	/* Perform work common to all participants */
+	_bt_parallel_scan_and_sort(leaderworker, leaderworker2, btleader->btshared,
+							   btleader->sharedsort, btleader->sharedsort2,
+							   sortmem, true);
+
+#ifdef BTREE_BUILD_STATS
+	if (log_btree_build_stats)
+	{
+		ShowUsage("BTREE BUILD (Leader Partial Spool) STATISTICS");
+		ResetUsage();
+	}
+#endif							/* BTREE_BUILD_STATS */
+}
+
+/*
+ * Perform work within a launched parallel process.
+ */
+void
+_bt_parallel_build_main(dsm_segment *seg, shm_toc *toc)
+{
+	char	   *sharedquery;
+	BTSpool    *btspool;
+	BTSpool    *btspool2;
+	BTShared   *btshared;
+	Sharedsort *sharedsort;
+	Sharedsort *sharedsort2;
+	Relation	heapRel;
+	Relation	indexRel;
+	LOCKMODE	heapLockmode;
+	LOCKMODE	indexLockmode;
+	WalUsage   *walusage;
+	BufferUsage *bufferusage;
+	int			sortmem;
+
+#ifdef BTREE_BUILD_STATS
+	if (log_btree_build_stats)
+		ResetUsage();
+#endif							/* BTREE_BUILD_STATS */
+
+	/*
+	 * The only possible status flag that can be set to the parallel worker is
+	 * PROC_IN_SAFE_IC.
+	 */
+	Assert((MyProc->statusFlags == 0) ||
+		   (MyProc->statusFlags == PROC_IN_SAFE_IC));
+
+	/* Set debug_query_string for individual workers first */
+	sharedquery = shm_toc_lookup(toc, PARALLEL_KEY_QUERY_TEXT, true);
+	debug_query_string = sharedquery;
+
+	/* Report the query string from leader */
+	pgstat_report_activity(STATE_RUNNING, debug_query_string);
+
+	/* Look up nbtree shared state */
+	btshared = shm_toc_lookup(toc, PARALLEL_KEY_BTREE_SHARED, false);
+
+	/* Open relations using lock modes known to be obtained by index.c */
+	if (!btshared->isconcurrent)
+	{
+		heapLockmode = ShareLock;
+		indexLockmode = AccessExclusiveLock;
+	}
+	else
+	{
+		heapLockmode = ShareUpdateExclusiveLock;
+		indexLockmode = RowExclusiveLock;
+	}
+
+	/* Open relations within worker */
+	heapRel = table_open(btshared->heaprelid, heapLockmode);
+	indexRel = index_open(btshared->indexrelid, indexLockmode);
+
+	/* Initialize worker's own spool */
+	btspool = (BTSpool *) palloc0(sizeof(BTSpool));
+	btspool->heap = heapRel;
+	btspool->index = indexRel;
+	btspool->isunique = btshared->isunique;
+	btspool->nulls_not_distinct = btshared->nulls_not_distinct;
+
+	/* Look up shared state private to tuplesort.c */
+	sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);
+	tuplesort_attach_shared(sharedsort, seg);
+	if (!btshared->isunique)
+	{
+		btspool2 = NULL;
+		sharedsort2 = NULL;
+	}
+	else
+	{
+		/* Allocate memory for worker's own private secondary spool */
+		btspool2 = (BTSpool *) palloc0(sizeof(BTSpool));
+
+		/* Initialize worker's own secondary spool */
+		btspool2->heap = btspool->heap;
+		btspool2->index = btspool->index;
+		btspool2->isunique = false;
+		/* Look up shared state private to tuplesort.c */
+		sharedsort2 = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT_SPOOL2, false);
+		tuplesort_attach_shared(sharedsort2, seg);
+	}
+
+	/* Prepare to track buffer usage during parallel execution */
+	InstrStartParallelQuery();
+
+	/* Perform sorting of spool, and possibly a spool2 */
+	sortmem = maintenance_work_mem / btshared->scantuplesortstates;
+	_bt_parallel_scan_and_sort(btspool, btspool2, btshared, sharedsort,
+							   sharedsort2, sortmem, false);
+
+	/* Report WAL/buffer usage during parallel execution */
+	bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false);
+	walusage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false);
+	InstrEndParallelQuery(&bufferusage[ParallelWorkerNumber],
+						  &walusage[ParallelWorkerNumber]);
+
+#ifdef BTREE_BUILD_STATS
+	if (log_btree_build_stats)
+	{
+		ShowUsage("BTREE BUILD (Worker Partial Spool) STATISTICS");
+		ResetUsage();
+	}
+#endif							/* BTREE_BUILD_STATS */
+
+	index_close(indexRel, indexLockmode);
+	table_close(heapRel, heapLockmode);
+}
+
+/*
+ * Perform a worker's portion of a parallel sort.
+ *
+ * This generates a tuplesort for passed btspool, and a second tuplesort
+ * state if a second btspool is need (i.e. for unique index builds).  All
+ * other spool fields should already be set when this is called.
+ *
+ * sortmem is the amount of working memory to use within each worker,
+ * expressed in KBs.
+ *
+ * When this returns, workers are done, and need only release resources.
+ */
+static void
+_bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2,
+						   BTShared *btshared, Sharedsort *sharedsort,
+						   Sharedsort *sharedsort2, int sortmem, bool progress)
+{
+	SortCoordinate coordinate;
+	BTBuildState buildstate;
+	TableScanDesc scan;
+	double		reltuples;
+	IndexInfo  *indexInfo;
+
+	/* Initialize local tuplesort coordination state */
+	coordinate = palloc0(sizeof(SortCoordinateData));
+	coordinate->isWorker = true;
+	coordinate->nParticipants = -1;
+	coordinate->sharedsort = sharedsort;
+
+	/* Begin "partial" tuplesort */
+	btspool->sortstate = tuplesort_begin_index_btree(btspool->heap,
+													 btspool->index,
+													 btspool->isunique,
+													 btspool->nulls_not_distinct,
+													 sortmem, coordinate,
+													 TUPLESORT_NONE);
+
+	/*
+	 * Just as with serial case, there may be a second spool.  If so, a
+	 * second, dedicated spool2 partial tuplesort is required.
+	 */
+	if (btspool2)
+	{
+		SortCoordinate coordinate2;
+
+		/*
+		 * We expect that the second one (for dead tuples) won't get very
+		 * full, so we give it only work_mem (unless sortmem is less for
+		 * worker).  Worker processes are generally permitted to allocate
+		 * work_mem independently.
+		 */
+		coordinate2 = palloc0(sizeof(SortCoordinateData));
+		coordinate2->isWorker = true;
+		coordinate2->nParticipants = -1;
+		coordinate2->sharedsort = sharedsort2;
+		btspool2->sortstate =
+			tuplesort_begin_index_btree(btspool->heap, btspool->index, false, false,
+										Min(sortmem, work_mem), coordinate2,
+										false);
+	}
+
+	/* Fill in buildstate for _bt_build_callback() */
+	buildstate.isunique = btshared->isunique;
+	buildstate.nulls_not_distinct = btshared->nulls_not_distinct;
+	buildstate.havedead = false;
+	buildstate.heap = btspool->heap;
+	buildstate.spool = btspool;
+	buildstate.spool2 = btspool2;
+	buildstate.indtuples = 0;
+	buildstate.btleader = NULL;
+
+	/* Join parallel scan */
+	indexInfo = BuildIndexInfo(btspool->index);
+	indexInfo->ii_Concurrent = btshared->isconcurrent;
+	scan = table_beginscan_parallel(btspool->heap,
+									ParallelTableScanFromBTShared(btshared));
+	reltuples = table_index_build_scan(btspool->heap, btspool->index, indexInfo,
+									   true, progress, _bt_build_callback,
+									   (void *) &buildstate, scan);
+
+	/* Execute this worker's part of the sort */
+	if (progress)
+		pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
+									 PROGRESS_BTREE_PHASE_PERFORMSORT_1);
+	tuplesort_performsort(btspool->sortstate);
+	if (btspool2)
+	{
+		if (progress)
+			pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
+										 PROGRESS_BTREE_PHASE_PERFORMSORT_2);
+		tuplesort_performsort(btspool2->sortstate);
+	}
+
+	/*
+	 * Done.  Record ambuild statistics, and whether we encountered a broken
+	 * HOT chain.
+	 */
+	SpinLockAcquire(&btshared->mutex);
+	btshared->nparticipantsdone++;
+	btshared->reltuples += reltuples;
+	if (buildstate.havedead)
+		btshared->havedead = true;
+	btshared->indtuples += buildstate.indtuples;
+	if (indexInfo->ii_BrokenHotChain)
+		btshared->brokenhotchain = true;
+	SpinLockRelease(&btshared->mutex);
+
+	/* Notify leader */
+	ConditionVariableSignal(&btshared->workersdonecv);
+
+	/* We can end tuplesorts immediately */
+	tuplesort_end(btspool->sortstate);
+	if (btspool2)
+		tuplesort_end(btspool2->sortstate);
+}
diff --git a/lib/pg_btree.c b/lib/pg_btree.c
index 68e3687..3b4bd93 100644
--- a/lib/pg_btree.c
+++ b/lib/pg_btree.c
@@ -45,8 +45,10 @@
 
 #include "logger.h"
 
-#if PG_VERSION_NUM >= 180000
+#if PG_VERSION_NUM >= 190000
 #error unsupported PostgreSQL version
+#elif PG_VERSION_NUM >= 180000
+#include "nbtree/nbtsort-18.c"
 #elif PG_VERSION_NUM >= 170000
 #include "nbtree/nbtsort-17.c"
 #elif PG_VERSION_NUM >= 160000

From 353d1aaea7c2c399667a11ae85785101ec57054f Mon Sep 17 00:00:00 2001
From: Shinya Kato <shinya11.kato@gmail.com>
Date: Thu, 31 Jul 2025 20:30:35 +0900
Subject: [PATCH 02/15] Fix 962da900a

See https://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=962da900ac8f0927f1af2fd811ca67fa163c873a
---
 include/pg_bulkload.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/pg_bulkload.h b/include/pg_bulkload.h
index ade5c91..d30ea8c 100644
--- a/include/pg_bulkload.h
+++ b/include/pg_bulkload.h
@@ -74,7 +74,7 @@ typedef Parser *(*ParserCreate)(void);
 #endif
 #endif
 
-#ifdef HAVE_LONG_INT_64
+#if SIZEOF_LONG == 8
 #define int64_FMT		"%ld"
 #else
 #define int64_FMT		"%lld"

From d10a79d2a0648a102c50983cf5fc3f1b98b60992 Mon Sep 17 00:00:00 2001
From: Shinya Kato <shinya11.kato@gmail.com>
Date: Fri, 1 Aug 2025 09:21:50 +0900
Subject: [PATCH 03/15] Fix 5983a4cff

See https://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=5983a4cffc31640fda6643f10146a5b72b203eaa
---
 lib/parser_csv.c      |  8 +++-
 lib/parser_function.c |  7 +++-
 lib/pg_btree.c        |  8 +++-
 lib/reader.c          | 93 +++++++++++++++++++++++++++++++++++++++----
 lib/source.c          |  4 +-
 5 files changed, 106 insertions(+), 14 deletions(-)

diff --git a/lib/parser_csv.c b/lib/parser_csv.c
index 1a9ae1d..0f73d5e 100644
--- a/lib/parser_csv.c
+++ b/lib/parser_csv.c
@@ -233,7 +233,9 @@ CSVParserInit(CSVParser *self, Checker *checker, const char *infile, TupleDesc d
 		{
 			for (i = 0; i < desc->natts; i++)
 			{
-#if PG_VERSION_NUM >= 110000
+#if PG_VERSION_NUM >= 180000
+				if (strcmp(lfirst(name), NameStr(TupleDescAttr(desc, i)->attname)) == 0)
+#elif PG_VERSION_NUM >= 110000
 				if (strcmp(lfirst(name), desc->attrs[i].attname.data) == 0)
 #else
 				if (strcmp(lfirst(name), desc->attrs[i]->attname.data) == 0)
@@ -694,7 +696,9 @@ CSVParserRead(CSVParser *self, Checker *checker)
 		else
 			ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR),
 							errmsg("missing data for column \"%s\"",
-#if PG_VERSION_NUM >= 110000
+#if PG_VERSION_NUM >= 180000
+								   NameStr(TupleDescAttr(self->former.desc, self->former.attnum[self->base.parsing_field])->attname)),
+#elif PG_VERSION_NUM >= 110000
 								   NameStr(self->former.desc->attrs[self->former.attnum[self->base.parsing_field]].attname)),
 #else
 								   NameStr(self->former.desc->attrs[self->former.attnum[self->base.parsing_field]]->attname)),
diff --git a/lib/parser_function.c b/lib/parser_function.c
index 1958ccb..5e0e5d8 100644
--- a/lib/parser_function.c
+++ b/lib/parser_function.c
@@ -333,11 +333,16 @@ FunctionParserInit(FunctionParser *self, Checker *checker, const char *infile, T
 
 	self->desc = CreateTupleDescCopy(desc);
 	for (i = 0; i < desc->natts; i++)
-#if PG_VERSION_NUM >= 110000
+	{
+#if PG_VERSION_NUM >= 180000
+		TupleDescAttr(self->desc, i)->attnotnull = TupleDescAttr(desc, i)->attnotnull;
+		populate_compact_attribute(self->desc, i);
+#elif PG_VERSION_NUM >= 110000
 		self->desc->attrs[i].attnotnull = desc->attrs[i].attnotnull;
 #else
 		self->desc->attrs[i]->attnotnull = desc->attrs[i]->attnotnull;
 #endif
+	}
 
 	self->estate = CreateExecutorState();
 	self->econtext = GetPerTupleExprContext(self->estate);
diff --git a/lib/pg_btree.c b/lib/pg_btree.c
index 3b4bd93..018ab52 100644
--- a/lib/pg_btree.c
+++ b/lib/pg_btree.c
@@ -1193,7 +1193,9 @@ tuple_to_cstring(TupleDesc tupdesc, HeapTuple tuple)
 		bool		nq;
 
 		/* Ignore dropped columns in datatype */
-#if PG_VERSION_NUM >= 110000
+#if PG_VERSION_NUM >= 180000
+		if (TupleDescAttr(tupdesc, i)->attisdropped)
+#elif PG_VERSION_NUM >= 110000
 		if (tupdesc->attrs[i].attisdropped)
 #else
 		if (tupdesc->attrs[i]->attisdropped)
@@ -1214,7 +1216,9 @@ tuple_to_cstring(TupleDesc tupdesc, HeapTuple tuple)
 			Oid			foutoid;
 			bool		typisvarlena;
 
-#if PG_VERSION_NUM >= 110000
+#if PG_VERSION_NUM >= 180000
+			getTypeOutputInfo(TupleDescAttr(tupdesc, i)->atttypid, &foutoid, &typisvarlena);
+#elif PG_VERSION_NUM >= 110000
 			getTypeOutputInfo(tupdesc->attrs[i].atttypid, &foutoid, &typisvarlena);
 #else
 			getTypeOutputInfo(tupdesc->attrs[i]->atttypid, &foutoid, &typisvarlena);
diff --git a/lib/reader.c b/lib/reader.c
index 5ad347e..ae5bcf7 100644
--- a/lib/reader.c
+++ b/lib/reader.c
@@ -493,11 +493,16 @@ CheckerInit(Checker *checker, Relation rel, TupleChecker *tchecker)
 
 		checker->desc = CreateTupleDescCopy(desc);
 		for (n = 0; n < desc->natts; n++)
-#if PG_VERSION_NUM >= 110000
+		{
+#if PG_VERSION_NUM >= 180000
+			TupleDescAttr(checker->desc, n)->attnotnull = TupleDescAttr(desc, n)->attnotnull;
+			populate_compact_attribute(checker->desc, n);
+#elif PG_VERSION_NUM >= 110000
 			checker->desc->attrs[n].attnotnull = desc->attrs[n].attnotnull;
 #else
 			checker->desc->attrs[n]->attnotnull = desc->attrs[n]->attnotnull;
 #endif
+		}
 	}
 }
 
@@ -595,7 +600,10 @@ CheckerConstraints(Checker *checker, HeapTuple tuple, int *parsing_field)
 
 		for (i = 0; i < desc->natts; i++)
 		{
-#if PG_VERSION_NUM >= 110000
+#if PG_VERSION_NUM >= 180000
+			if (TupleDescAttr(desc, i)->attnotnull &&
+				att_isnull(i, tuple->t_data->t_bits))
+#elif PG_VERSION_NUM >= 110000
 			if (desc->attrs[i].attnotnull &&
 				att_isnull(i, tuple->t_data->t_bits))
 #else
@@ -608,7 +616,9 @@ CheckerConstraints(Checker *checker, HeapTuple tuple, int *parsing_field)
 				ereport(ERROR,
 						(errcode(ERRCODE_NOT_NULL_VIOLATION),
 						 errmsg("null value in column \"%s\" violates not-null constraint",
-#if PG_VERSION_NUM >= 110000
+#if PG_VERSION_NUM >= 180000
+						NameStr(TupleDescAttr(desc, i)->attname))));
+#elif PG_VERSION_NUM >= 110000
 						NameStr(desc->attrs[i].attname))));
 #else
 						NameStr(desc->attrs[i]->attname))));
@@ -630,11 +640,16 @@ TupleFormerInit(TupleFormer *former, Filter *filter, TupleDesc desc)
 
 	former->desc = CreateTupleDescCopy(desc);
 	for (i = 0; i < desc->natts; i++)
-#if PG_VERSION_NUM >= 110000
+	{
+#if PG_VERSION_NUM >= 180000
+		TupleDescAttr(former->desc, i)->attnotnull = TupleDescAttr(desc, i)->attnotnull;
+		populate_compact_attribute(former->desc, i);
+#elif PG_VERSION_NUM >= 110000
 		former->desc->attrs[i].attnotnull = desc->attrs[i].attnotnull;
 #else
 		former->desc->attrs[i]->attnotnull = desc->attrs[i]->attnotnull;
 #endif
+	}
 
 	/*
 	 * allocate buffer to store columns or function arguments
@@ -689,7 +704,19 @@ TupleFormerInit(TupleFormer *former, Filter *filter, TupleDesc desc)
 		former->maxfields = 0;
 		for (i = 0; i < natts; i++)
 		{
-#if PG_VERSION_NUM >= 110000
+#if PG_VERSION_NUM >= 180000
+			/* ignore dropped columns */
+			if (TupleDescAttr(desc, i)->attisdropped)
+				continue;
+
+			/* get type information and input function */
+			getTypeInputInfo(TupleDescAttr(desc, i)->atttypid,
+							 &in_func_oid, &former->typIOParam[i]);
+			fmgr_info(in_func_oid, &former->typInput[i]);
+
+			former->typMod[i] = TupleDescAttr(desc, i)->atttypmod;
+			former->typId[i] = TupleDescAttr(desc, i)->atttypid;
+#elif PG_VERSION_NUM >= 110000
 			/* ignore dropped columns */
 			if (attrs[i].attisdropped)
 				continue;
@@ -807,7 +834,19 @@ tupledesc_match(TupleDesc dst_tupdesc, TupleDesc src_tupdesc)
 
 	for (i = 0; i < dst_tupdesc->natts; i++)
 	{
-#if PG_VERSION_NUM >= 110000
+#if PG_VERSION_NUM >= 180000
+		FormData_pg_attribute *dattr = TupleDescAttr(dst_tupdesc, i);
+		FormData_pg_attribute *sattr = TupleDescAttr(src_tupdesc, i);
+
+		if (dattr->atttypid == sattr->atttypid)
+			continue;			/* no worries */
+		if (!dattr->attisdropped)
+			return false;
+
+		if (dattr->attlen != sattr->attlen ||
+			dattr->attalign != sattr->attalign)
+			return false;
+#elif PG_VERSION_NUM >= 110000
 		FormData_pg_attribute dattr = dst_tupdesc->attrs[i];
 		FormData_pg_attribute sattr = src_tupdesc->attrs[i];
 
@@ -1262,7 +1301,19 @@ CoercionDeformTuple(TupleChecker *self, HeapTuple tuple, int *parsing_field)
 
 		for (i = 0; i < natts; i++)
 		{
-#if PG_VERSION_NUM >= 110000
+#if PG_VERSION_NUM >= 180000
+			if (TupleDescAttr(self->sourceDesc, i)->atttypid ==
+				TupleDescAttr(self->targetDesc, i)->atttypid)
+				continue;
+
+			getTypeOutputInfo(TupleDescAttr(self->sourceDesc, i)->atttypid,
+							  &iofunc, &self->typIsVarlena[i]);
+			fmgr_info(iofunc, &self->typOutput[i]);
+
+			getTypeInputInfo(TupleDescAttr(self->targetDesc, i)->atttypid, &iofunc,
+							 &self->typIOParam[i]);
+			fmgr_info(iofunc, &self->typInput[i]);
+#elif PG_VERSION_NUM >= 110000
 			if (self->sourceDesc->attrs[i].atttypid ==
 				self->targetDesc->attrs[i].atttypid)
 				continue;
@@ -1298,7 +1349,33 @@ CoercionDeformTuple(TupleChecker *self, HeapTuple tuple, int *parsing_field)
 	{
 		*parsing_field = i + 1;
 
-#if PG_VERSION_NUM >= 110000
+#if PG_VERSION_NUM >= 180000
+		/* Ignore dropped columns in datatype */
+		if (TupleDescAttr(self->targetDesc, i)->attisdropped)
+			continue;
+
+		if (self->nulls[i])
+		{
+			/* emit nothing... */
+			continue;
+		}
+		else if (TupleDescAttr(self->sourceDesc, i)->atttypid ==
+				 TupleDescAttr(self->targetDesc, i)->atttypid)
+		{
+			continue;
+		}
+		else
+		{
+			char   *value;
+
+			value = OutputFunctionCall(&self->typOutput[i], self->values[i]);
+			self->values[i] = InputFunctionCall(&self->typInput[i], value,
+										self->typIOParam[i],
+										TupleDescAttr(self->targetDesc, i)->atttypmod);
+			pfree(value);
+		}
+	}
+#elif PG_VERSION_NUM >= 110000
 		/* Ignore dropped columns in datatype */
 		if (self->targetDesc->attrs[i].attisdropped)
 			continue;
diff --git a/lib/source.c b/lib/source.c
index 5de7a7f..6c1a00c 100644
--- a/lib/source.c
+++ b/lib/source.c
@@ -481,7 +481,9 @@ CreateRemoteSource(const char *path, TupleDesc desc)
 		/* count valid fields */
 		for (nattrs = 0, i = 0; i < desc->natts; i++)
 		{
-#if PG_VERSION_NUM >= 110000
+#if PG_VERSION_NUM >= 180000
+			if (TupleDescAttr(desc, i)->attisdropped)
+#elif PG_VERSION_NUM >= 110000
 			if (desc->attrs[i].attisdropped)
 #else
 			if (desc->attrs[i]->attisdropped)

From 8a1bb0b90795428597d6dbbd77da7cacace34fb6 Mon Sep 17 00:00:00 2001
From: Shinya Kato <shinya11.kato@gmail.com>
Date: Fri, 1 Aug 2025 09:34:29 +0900
Subject: [PATCH 04/15] Fix cbc127917

See https://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=cbc127917e04a978a788b8bc9d35a70244396d5b
---
 lib/reader.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/reader.c b/lib/reader.c
index ae5bcf7..af05982 100644
--- a/lib/reader.c
+++ b/lib/reader.c
@@ -470,8 +470,10 @@ CheckerInit(Checker *checker, Relation rel, TupleChecker *tchecker)
          * is not essential. */
         ExecCheckRTPerms(range_table, true);
 #endif
-	
-#if PG_VERSION_NUM >= 160000
+
+#if PG_VERSION_NUM >= 180000
+		ExecInitRangeTable(checker->estate, range_table, perminfos, bms_make_singleton(1));
+#elif PG_VERSION_NUM >= 160000
 		ExecInitRangeTable(checker->estate, range_table, perminfos);
 #elif PG_VERSION_NUM >= 120000
 		/* Some APIs have changed significantly as of v12. */

From 7bc4d45f2737d1863b47f57f3fccedd096a13e1c Mon Sep 17 00:00:00 2001
From: Shinya Kato <shinya11.kato@gmail.com>
Date: Mon, 4 Aug 2025 16:29:04 +0900
Subject: [PATCH 05/15] Fix e596e077b

See https://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=e596e077bbb3b512bbc80610d64dc007a5761ce6
---
 lib/pg_btree.c | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/lib/pg_btree.c b/lib/pg_btree.c
index 018ab52..95cdaa6 100644
--- a/lib/pg_btree.c
+++ b/lib/pg_btree.c
@@ -188,7 +188,40 @@ SpoolerClose(Spooler *self)
 
 	/* Terminate spooler. */
 	ExecDropSingleTupleTableSlot(self->slot);
-#if PG_VERSION_NUM >= 140000
+#if PG_VERSION_NUM >= 180000
+	/*
+	 * In PostgreSQL 18 and later, ExecCloseIndices() asserts that it is not
+	 * called on a ResultRelInfo with already-closed indexes. However,
+	 * IndexSpoolEnd() may have already closed some indexes (e.g. non-btree
+	 * indexes that get reindexed), which causes the assertion to fail.
+	 *
+	 * To work around this, we replicate the logic of ExecCloseIndices()
+	 * here, but with a check to skip already-closed indexes, similar to how
+	 * older PostgreSQL versions behaved. We don't call
+	 * ExecCloseResultRelations() to avoid the problematic assertion.
+	 */
+	if (self->relinfo)
+	{
+		ResultRelInfo *resultRelInfo = self->relinfo;
+		int			i;
+
+		if (resultRelInfo->ri_NumIndices > 0)
+		{
+			for (i = 0; i < resultRelInfo->ri_NumIndices; i++)
+			{
+				if (resultRelInfo->ri_IndexRelationDescs[i] == NULL)
+					continue;
+
+				/* Give the index a chance to do some post-insert cleanup */
+				index_insert_cleanup(resultRelInfo->ri_IndexRelationDescs[i],
+									 resultRelInfo->ri_IndexRelationInfo[i]);
+
+				/* Drop lock acquired by ExecOpenIndices */
+				index_close(resultRelInfo->ri_IndexRelationDescs[i], RowExclusiveLock);
+			}
+		}
+	}
+#elif PG_VERSION_NUM >= 140000
 	if (self->relinfo)
 		ExecCloseResultRelations(self->estate);
 #else

From 8c64622332b9d9f27d871140abce38457b9a7760 Mon Sep 17 00:00:00 2001
From: Shinya Kato <shinya11.kato@gmail.com>
Date: Mon, 4 Aug 2025 20:14:07 +0900
Subject: [PATCH 06/15] Fix 525392d57

See https://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=525392d5727f469e9a5882e1d728917a4be56147
---
 lib/reader.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/lib/reader.c b/lib/reader.c
index af05982..86f10fd 100644
--- a/lib/reader.c
+++ b/lib/reader.c
@@ -22,6 +22,7 @@
 #include "nodes/parsenodes.h"
 #include "parser/parse_coerce.h"
 #include "pgstat.h"
+#include "storage/lmgr.h"
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
@@ -461,7 +462,15 @@ CheckerInit(Checker *checker, Relation rel, TupleChecker *tchecker)
         }
 #endif
 
-#if PG_VERSION_NUM >= 160000
+#if PG_VERSION_NUM >= 180000
+		/*
+		* In PostgreSQL 18, ExecCheckPermissions() requires the relation to be locked.
+		* Acquire an AccessShareLock before calling it and release the lock afterward.
+		*/
+		LockRelationOid(RelationGetRelid(rel), AccessShareLock);
+		ExecCheckPermissions(range_table, perminfos, true);
+		UnlockRelationOid(RelationGetRelid(rel), AccessShareLock);
+#elif PG_VERSION_NUM >= 160000
 		ExecCheckPermissions(range_table, perminfos, true);
 #elif PG_VERSION_NUM >= 90100
         /* This API is published only from 9.1. 

From 15bf8bf6efc08eff6f0bd0d1e5706cd176eeed95 Mon Sep 17 00:00:00 2001
From: Shinya Kato <shinya11.kato@gmail.com>
Date: Mon, 4 Aug 2025 20:29:52 +0900
Subject: [PATCH 07/15] Fix 37c87e63f

See https://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=37c87e63f9e1a2d76db54fedcdf91d3895d200a6
---
 lib/writer_direct.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/lib/writer_direct.c b/lib/writer_direct.c
index 324729d..84dda32 100644
--- a/lib/writer_direct.c
+++ b/lib/writer_direct.c
@@ -672,16 +672,20 @@ open_data_file(
 #else
 	RelFileNodeBackend	bknode;
 	bknode.node = rnode;
-#endif
+#endif  /* PG_VERSION_NUM >= 160000 */
 #if PG_VERSION_NUM >= 170000
 	bknode.backend = istemp ? MyBackendType : InvalidCommandId;
 #else
 	bknode.backend = istemp ? MyBackendId : InvalidBackendId;
-#endif
+#endif  /* PG_VERSION_NUM >= 170000 */
+#if PG_VERSION_NUM >= 180000
+	fname = pstrdup(relpath(bknode, MAIN_FORKNUM).str);
+#else
 	fname = relpath(bknode, MAIN_FORKNUM);
+#endif  /* PG_VERSION_NUM >= 180000 */
 #else
 	fname = relpath(rnode, MAIN_FORKNUM);
-#endif
+#endif  /* PG_VERSION_NUM >= 90100 */
 	segno = blknum / RELSEG_SIZE;
 	if (segno > 0)
 	{

From a7b05c069d933ed67fd38abe00f96a6165718722 Mon Sep 17 00:00:00 2001
From: Shinya Kato <shinya11.kato@gmail.com>
Date: Mon, 4 Aug 2025 20:45:02 +0900
Subject: [PATCH 08/15] Fix 426ea6111

See https://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=426ea611171da4e60ab4f3863fa3cc3683ae9547
---
 lib/nbtree/nbtsort-18.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/lib/nbtree/nbtsort-18.c b/lib/nbtree/nbtsort-18.c
index f5d7b3b..fa336ba 100644
--- a/lib/nbtree/nbtsort-18.c
+++ b/lib/nbtree/nbtsort-18.c
@@ -29,7 +29,7 @@
  * This code isn't concerned about the FSM at all. The caller is responsible
  * for initializing that.
  *
- * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
@@ -45,14 +45,13 @@
 #include "access/relscan.h"
 #include "access/table.h"
 #include "access/xact.h"
-#include "access/xloginsert.h"
 #include "catalog/index.h"
 #include "commands/progress.h"
 #include "executor/instrument.h"
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "storage/bulk_write.h"
-#include "tcop/tcopprot.h"		/* pgrminclude ignore */
+#include "tcop/tcopprot.h"
 #include "utils/rel.h"
 #include "utils/sortsupport.h"
 #include "utils/tuplesort.h"
@@ -105,6 +104,9 @@ typedef struct BTShared
 	bool		isconcurrent;
 	int			scantuplesortstates;
 
+	/* Query ID, for report in worker processes */
+	uint64		queryid;
+
 	/*
 	 * workersdonecv is used to monitor the progress of workers.  All parallel
 	 * participants must indicate that they are done before leader can use
@@ -473,7 +475,7 @@ _bt_spools_heapscan(Relation heap, Relation index, BTBuildState *buildstate,
 	/* Fill spool using either serial or parallel heap scan */
 	if (!buildstate->btleader)
 		reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
-										   _bt_build_callback, (void *) buildstate,
+										   _bt_build_callback, buildstate,
 										   NULL);
 	else
 		reltuples = _bt_parallel_heapscan(buildstate,
@@ -827,7 +829,7 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup,
 	 * make use of the reserved space.  This should never fail on internal
 	 * pages.
 	 */
-	if (unlikely(itupsz > BTMaxItemSize(npage)))
+	if (unlikely(itupsz > BTMaxItemSize))
 		_bt_check_third_page(wstate->index, wstate->heap, isleaf, npage,
 							 itup);
 
@@ -1303,7 +1305,7 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
 				 */
 				dstate->maxpostingsize = MAXALIGN_DOWN((BLCKSZ * 10 / 100)) -
 					sizeof(ItemIdData);
-				Assert(dstate->maxpostingsize <= BTMaxItemSize((Page) state->btps_buf) &&
+				Assert(dstate->maxpostingsize <= BTMaxItemSize &&
 					   dstate->maxpostingsize <= INDEX_SIZE_MASK);
 				dstate->htids = palloc(dstate->maxpostingsize);
 
@@ -1505,6 +1507,7 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
 	btshared->nulls_not_distinct = btspool->nulls_not_distinct;
 	btshared->isconcurrent = isconcurrent;
 	btshared->scantuplesortstates = scantuplesortstates;
+	btshared->queryid = pgstat_get_my_query_id();
 	ConditionVariableInit(&btshared->workersdonecv);
 	SpinLockInit(&btshared->mutex);
 	/* Initialize mutable state */
@@ -1787,6 +1790,9 @@ _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc)
 		indexLockmode = RowExclusiveLock;
 	}
 
+	/* Track query ID */
+	pgstat_report_query_id(btshared->queryid, false);
+
 	/* Open relations within worker */
 	heapRel = table_open(btshared->heaprelid, heapLockmode);
 	indexRel = index_open(btshared->indexrelid, indexLockmode);
@@ -1924,7 +1930,7 @@ _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2,
 									ParallelTableScanFromBTShared(btshared));
 	reltuples = table_index_build_scan(btspool->heap, btspool->index, indexInfo,
 									   true, progress, _bt_build_callback,
-									   (void *) &buildstate, scan);
+									   &buildstate, scan);
 
 	/* Execute this worker's part of the sort */
 	if (progress)

From 32c6f4c965ac58cbded950111a7f79c9063a8e4c Mon Sep 17 00:00:00 2001
From: Shinya Kato <shinya11.kato@gmail.com>
Date: Tue, 5 Aug 2025 19:40:42 +0900
Subject: [PATCH 09/15] Fix e83a8ae44

See https://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=e83a8ae44729bfdd15d31101bd2c99be98cfc691
---
 lib/reader.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lib/reader.c b/lib/reader.c
index 86f10fd..92e96cd 100644
--- a/lib/reader.c
+++ b/lib/reader.c
@@ -1097,6 +1097,7 @@ FilterTuple(Filter *filter, TupleFormer *former, int *parsing_field)
 	MemoryContextSwitchTo(oldcontext);
 	CurrentResourceOwner = oldowner;
 
+# if PG_VERSION_NUM < 180000
 	/* set fn_extra except the first time call */
 	if ( filter->is_first_time_call == false &&
 		MemoryContextIsValid(filter->fn_extra.fcontext) &&
@@ -1104,11 +1105,10 @@ FilterTuple(Filter *filter, TupleFormer *former, int *parsing_field)
 		flinfo.fn_extra = (SQLFunctionCache *) palloc0(sizeof(SQLFunctionCache));
 		memmove((SQLFunctionCache *)flinfo.fn_extra, &(filter->fn_extra),
 							sizeof(SQLFunctionCache));
-	} else {
-
-		filter->is_first_time_call = true;	
-	}
-#endif
+	} else
+#endif  /* PG_VERSION_NUM < 180000 */
+		filter->is_first_time_call = true;
+#endif  /* PG_VERSION_NUM >= 90204 */
 
 #if PG_VERSION_NUM >= 120000
 	InitFunctionCallInfoData(*fcinfo, &flinfo, filter->nargs,

From 07953bdc8fbac52e7f314769b9cbec96544b3c36 Mon Sep 17 00:00:00 2001
From: Shinya Kato <shinya11.kato@gmail.com>
Date: Tue, 5 Aug 2025 19:54:43 +0900
Subject: [PATCH 10/15] Update nbtsort-18.c to HEAD of REL_18_STABLE

---
 lib/nbtree/nbtsort-18.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/lib/nbtree/nbtsort-18.c b/lib/nbtree/nbtsort-18.c
index fa336ba..9d70e89 100644
--- a/lib/nbtree/nbtsort-18.c
+++ b/lib/nbtree/nbtsort-18.c
@@ -105,7 +105,7 @@ typedef struct BTShared
 	int			scantuplesortstates;
 
 	/* Query ID, for report in worker processes */
-	uint64		queryid;
+	int64		queryid;
 
 	/*
 	 * workersdonecv is used to monitor the progress of workers.  All parallel
@@ -1171,7 +1171,7 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
 		{
 			SortSupport sortKey = sortKeys + i;
 			ScanKey		scanKey = wstate->inskey->scankeys + i;
-			int16		strategy;
+			bool		reverse;
 
 			sortKey->ssup_cxt = CurrentMemoryContext;
 			sortKey->ssup_collation = scanKey->sk_collation;
@@ -1183,10 +1183,9 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
 
 			Assert(sortKey->ssup_attno != 0);
 
-			strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ?
-				BTGreaterStrategyNumber : BTLessStrategyNumber;
+			reverse = (scanKey->sk_flags & SK_BT_DESC) != 0;
 
-			PrepareSortSupportFromIndexRel(wstate->index, strategy, sortKey);
+			PrepareSortSupportFromIndexRel(wstate->index, reverse, sortKey);
 		}
 
 		for (;;)

From d532226194abd540a0164e9faaa2f1a9351481eb Mon Sep 17 00:00:00 2001
From: Shinya Kato <shinya11.kato@gmail.com>
Date: Tue, 5 Aug 2025 20:09:36 +0900
Subject: [PATCH 11/15] Fix test.yml and Makefile

---
 .github/workflows/test.yml | 10 +++++-----
 bin/Makefile               | 25 ++++++++++++++++++-------
 2 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 9019f4f..5fb5319 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -15,11 +15,11 @@ jobs:
     strategy:
       matrix:
         PGVERSION:                # TODO: build with master branch
-          - "17.2"
-          - "16.6"
-          - "15.10"
-          - "14.15"
-          - "13.18"
+          - "18.0"
+          - "17.5"
+          - "16.9"
+          - "15.13"
+          - "14.18"
 
     env:
       CACHE_VERSION: 20221222  # to identify cache version
diff --git a/bin/Makefile b/bin/Makefile
index ddb49d7..2c4557b 100644
--- a/bin/Makefile
+++ b/bin/Makefile
@@ -70,6 +70,8 @@ sql/init-16.sql:
 	cp sql/init-extension-v3.sql sql/init-16.sql
 sql/init-17.sql:
 	cp sql/init-extension-v3.sql sql/init-17.sql
+sql/init-18.sql:
+	cp sql/init-extension-v3.sql sql/init-18.sql
 
 sql/load_bin.sql: sql/load_bin-$(MAJORVERSION).sql
 	cp sql/load_bin-$(MAJORVERSION).sql sql/load_bin.sql
@@ -97,6 +99,8 @@ sql/load_bin-16.sql:
 	cp sql/load_bin-v2.sql sql/load_bin-16.sql
 sql/load_bin-17.sql:
 	cp sql/load_bin-v2.sql sql/load_bin-17.sql
+sql/load_bin-18.sql:
+	cp sql/load_bin-v2.sql sql/load_bin-18.sql
 
 sql/load_csv.sql: sql/load_csv-$(MAJORVERSION).sql
 	cp sql/load_csv-$(MAJORVERSION).sql sql/load_csv.sql
@@ -124,6 +128,8 @@ sql/load_csv-16.sql:
 	cp sql/load_csv-v3.sql sql/load_csv-16.sql
 sql/load_csv-17.sql:
 	cp sql/load_csv-v3.sql sql/load_csv-17.sql
+sql/load_csv-18.sql:
+	cp sql/load_csv-v3.sql sql/load_csv-18.sql
 
 sql/load_filter.sql: sql/load_filter-$(MAJORVERSION).sql
 	cp sql/load_filter-$(MAJORVERSION).sql sql/load_filter.sql
@@ -151,7 +157,8 @@ sql/load_filter-16.sql:
 	cp sql/load_filter-v3.sql sql/load_filter-16.sql
 sql/load_filter-17.sql:
 	cp sql/load_filter-v3.sql sql/load_filter-17.sql
-
+sql/load_filter-18.sql:
+	cp sql/load_filter-v3.sql sql/load_filter-18.sql
 
 sql/load_function.sql: sql/load_function-$(MAJORVERSION).sql
 	cp sql/load_function-$(MAJORVERSION).sql sql/load_function.sql
@@ -179,6 +186,8 @@ sql/load_function-16.sql:
 	cp sql/load_function-v3.sql sql/load_function-16.sql
 sql/load_function-17.sql:
 	cp sql/load_function-v3.sql sql/load_function-17.sql
+sql/load_function-18.sql:
+	cp sql/load_function-v3.sql sql/load_function-18.sql
 
 sql/write_bin.sql: sql/write_bin-$(MAJORVERSION).sql
 	cp sql/write_bin-$(MAJORVERSION).sql sql/write_bin.sql
@@ -206,17 +215,19 @@ sql/write_bin-16.sql:
 	cp sql/write_bin-v2.sql sql/write_bin-16.sql
 sql/write_bin-17.sql:
 	cp sql/write_bin-v2.sql sql/write_bin-17.sql
+sql/write_bin-18.sql:
+	cp sql/write_bin-v2.sql sql/write_bin-18.sql
 
 .PHONY: subclean
 clean: subclean
 
 subclean:
-	rm -f sql/init.sql sql/init-{9.3,9.4,9.5,9.6,10,11,12,13,14,15,16,17}.sql
-	rm -f sql/load_bin.sql sql/load_bin-{9.3,9.4,9.5,9.6,10,11,12,13,14,15,16,17}.sql
-	rm -f sql/load_csv.sql sql/load_csv-{9.3,9.4,9.5,9.6,10,11,12,13,14,15,16,17}.sql
-	rm -f sql/load_filter.sql sql/load_filter-{9.3,9.4,9.5,9.6,10,11,12,13,14,15,16,17}.sql
-	rm -f sql/load_function.sql sql/load_function-{9.3,9.4,9.5,9.6,10,11,12,13,14,15,16,17}.sql
-	rm -f sql/write_bin.sql sql/write_bin-{9.3,9.4,9.5,9.6,10,11,12,13,14,15,16,17}.sql
+	rm -f sql/init.sql sql/init-{9.3,9.4,9.5,9.6,10,11,12,13,14,15,16,17,18}.sql
+	rm -f sql/load_bin.sql sql/load_bin-{9.3,9.4,9.5,9.6,10,11,12,13,14,15,16,17,18}.sql
+	rm -f sql/load_csv.sql sql/load_csv-{9.3,9.4,9.5,9.6,10,11,12,13,14,15,16,17,18}.sql
+	rm -f sql/load_filter.sql sql/load_filter-{9.3,9.4,9.5,9.6,10,11,12,13,14,15,16,17,18}.sql
+	rm -f sql/load_function.sql sql/load_function-{9.3,9.4,9.5,9.6,10,11,12,13,14,15,16,17,18}.sql
+	rm -f sql/write_bin.sql sql/write_bin-{9.3,9.4,9.5,9.6,10,11,12,13,14,15,16,17,18}.sql
 
 installcheck: sql/init.sql sql/load_bin.sql sql/load_csv.sql sql/load_function.sql sql/load_filter.sql sql/write_bin.sql
 

From 65f9f48394786b893f4b443689d41e5255917b5e Mon Sep 17 00:00:00 2001
From: Shinya Kato <shinya11.kato@gmail.com>
Date: Wed, 27 Aug 2025 11:55:08 +0900
Subject: [PATCH 12/15] Fix trivial miss

---
 lib/reader.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/lib/reader.c b/lib/reader.c
index 92e96cd..f831d12 100644
--- a/lib/reader.c
+++ b/lib/reader.c
@@ -705,13 +705,15 @@ TupleFormerInit(TupleFormer *former, Filter *filter, TupleDesc desc)
 	}
 	else
 	{
-#if PG_VERSION_NUM >= 110000
-		FormData_pg_attribute  *attrs;
-#else
+#if PG_VERSION_NUM < 110000
 		Form_pg_attribute  *attrs;
+#elif PG_VERSION_NUM < 180000
+		FormData_pg_attribute  *attrs;
 #endif
 
+#if PG_VERSION_NUM < 180000
 		attrs = desc->attrs;
+#endif
 		former->maxfields = 0;
 		for (i = 0; i < natts; i++)
 		{

From 51969ab4209fbb0e2274ea678fdeffd819a1d697 Mon Sep 17 00:00:00 2001
From: Shinya Kato <shinya11.kato@gmail.com>
Date: Thu, 4 Dec 2025 09:51:02 +0900
Subject: [PATCH 13/15] Add v18 to test target

---
 .github/workflows/build.yml   |  2 +-
 .github/workflows/release.yml |  2 +-
 .github/workflows/test.yml    | 10 +++++-----
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 12c2853..74c9052 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -15,11 +15,11 @@ jobs:
     strategy:
       matrix:
         PGVERSION:                # TODO: build with master branch
+          - "18"
           - "17"
           - "16"
           - "15"
           - "14"
-          - "13"
 
     env:
       CACHE_VERSION: 20221222  # to identify cache version
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index ada0cd7..95d94cd 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -14,7 +14,7 @@ jobs:
       fail-fast: false
       matrix:
         RHEL_VERSION: ["8", "9", "10"]
-        PG_VERSION: ["13", "14", "15", "16", "17"]
+        PG_VERSION: ["14", "15", "16", "17", "18"]
     steps:
     - name: Checkout repository
       uses: actions/checkout@v4
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 5fb5319..a21c2e6 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -15,11 +15,11 @@ jobs:
     strategy:
       matrix:
         PGVERSION:                # TODO: build with master branch
-          - "18.0"
-          - "17.5"
-          - "16.9"
-          - "15.13"
-          - "14.18"
+          - "18.1"
+          - "17.7"
+          - "16.11"
+          - "15.15"
+          - "14.20"
 
     env:
       CACHE_VERSION: 20221222  # to identify cache version

From 7c58aa989cc5d191cac11b2f505f0350bc3576d3 Mon Sep 17 00:00:00 2001
From: Shinya Kato <shinya11.kato@gmail.com>
Date: Thu, 4 Dec 2025 10:35:13 +0900
Subject: [PATCH 14/15] Update cache versions

---
 .github/workflows/build.yml | 2 +-
 .github/workflows/test.yml  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 74c9052..3444081 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -22,7 +22,7 @@ jobs:
           - "14"
 
     env:
-      CACHE_VERSION: 20221222  # to identify cache version
+      CACHE_VERSION: 20251204  # to identify cache version
 
     steps:
     - name: cat version
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index a21c2e6..5a98597 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -22,7 +22,7 @@ jobs:
           - "14.20"
 
     env:
-      CACHE_VERSION: 20221222  # to identify cache version
+      CACHE_VERSION: 20251204  # to identify cache version
 
     steps:
     - name: cat version

From 845b4b7ade9150d62da49dd6d8770ebe8d19b822 Mon Sep 17 00:00:00 2001
From: Shinya Kato <shinya11.kato@gmail.com>
Date: Thu, 4 Dec 2025 10:41:54 +0900
Subject: [PATCH 15/15] Add libnuma-dev to build.yaml

---
 .github/workflows/build.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 3444081..91ec121 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -65,6 +65,7 @@ jobs:
           libselinux-dev                  \
           libedit-dev                     \
           liblz4-dev                      \
+          libnuma-dev                     \
           zlib1g-dev                      \
           postgresql-${{ env.PGVERSION }} \
           postgresql-server-dev-${{ env.PGVERSION }}