From 7f1f04636d9a8b959f954042a422d9963614a55a Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 2 Apr 2018 20:36:11 +0900 Subject: [PATCH] Refactor pg_control parsing The "data_checksum_version" field towards the end of the ControlFileData struct, meaning its position varies between versions. Previously this wasn't a problem as it was only required for operations involving 9.5 and later, and its position within the control file has not changed between the current release and current HEAD. However, in order to support pg_rewind in 9.3 and 9.4, which both have changes in the control file format, we'll need version-specific parsing. This will also make it easier to deal with any future changes to the control file format. --- controldata.c | 138 +++++++++++++++++----- controldata.h | 251 +++++++++++++++++++++++++++++++++++++++- repmgr-action-standby.c | 2 + 3 files changed, 360 insertions(+), 31 deletions(-) diff --git a/controldata.c b/controldata.c index 2931a6d0..6c6f9358 100644 --- a/controldata.c +++ b/controldata.c @@ -37,13 +37,8 @@ get_system_identifier(const char *data_directory) uint64 system_identifier = UNKNOWN_SYSTEM_IDENTIFIER; control_file_info = get_controlfile(data_directory); + system_identifier = control_file_info->system_identifier; - if (control_file_info->control_file_processed == true) - system_identifier = control_file_info->control_file->system_identifier; - else - system_identifier = UNKNOWN_SYSTEM_IDENTIFIER; - - pfree(control_file_info->control_file); pfree(control_file_info); return system_identifier; @@ -57,13 +52,8 @@ get_db_state(const char *data_directory) control_file_info = get_controlfile(data_directory); - if (control_file_info->control_file_processed == true) - state = control_file_info->control_file->state; - else - /* if we were unable to parse the control file, assume DB is shut down */ - state = DB_SHUTDOWNED; + state = control_file_info->state; - pfree(control_file_info->control_file); pfree(control_file_info); return state; @@ -78,12 +68,8 @@ get_latest_checkpoint_location(const char *data_directory) control_file_info = get_controlfile(data_directory); - if (control_file_info->control_file_processed == false) - return InvalidXLogRecPtr; + checkPoint = control_file_info->checkPoint; - checkPoint = control_file_info->control_file->checkPoint; - - pfree(control_file_info->control_file); pfree(control_file_info); return checkPoint; @@ -98,16 +84,8 @@ get_data_checksum_version(const char *data_directory) control_file_info = get_controlfile(data_directory); - if (control_file_info->control_file_processed == false) - { - data_checksum_version = -1; - } - else - { - data_checksum_version = (int) control_file_info->control_file->data_checksum_version; - } + data_checksum_version = (int) control_file_info->data_checksum_version; - pfree(control_file_info->control_file); pfree(control_file_info); return data_checksum_version; @@ -139,19 +117,74 @@ describe_db_state(DBState state) /* - * we maintain our own version of get_controlfile() as we need cross-version + * We maintain our own version of get_controlfile() as we need cross-version * compatibility, and also don't care if the file isn't readable. */ static ControlFileInfo * get_controlfile(const char *DataDir) { ControlFileInfo *control_file_info; - int fd; + FILE *fp = NULL; + int fd, ret, version_num; + char PgVersionPath[MAXPGPATH] = ""; char ControlFilePath[MAXPGPATH] = ""; + char file_version_string[64] = ""; + long file_major, file_minor; + char *endptr = NULL; + void *ControlFileDataPtr = NULL; + int expected_size = 0; control_file_info = palloc0(sizeof(ControlFileInfo)); + + /* set default values */ control_file_info->control_file_processed = false; - control_file_info->control_file = palloc0(sizeof(ControlFileData)); + control_file_info->system_identifier = UNKNOWN_SYSTEM_IDENTIFIER; + control_file_info->state = DB_SHUTDOWNED; + control_file_info->checkPoint = InvalidXLogRecPtr; + control_file_info->data_checksum_version = -1; + + /* + * Read PG_VERSION, as we'll need to determine which struct to read + * the control file contents into + */ + snprintf(PgVersionPath, MAXPGPATH, "%s/PG_VERSION", DataDir); + + fp = fopen(PgVersionPath, "r"); + + if (fp == NULL) + { + log_warning(_("could not open file \"%s\" for reading"), + PgVersionPath); + log_detail("%s", strerror(errno)); + return control_file_info; + } + + file_version_string[0] = '\0'; + + ret = fscanf(fp, "%63s", file_version_string); + fclose(fp); + + if (ret != 1 || endptr == file_version_string) + { + log_warning(_("unable to determine major version number from PG_VERSION")); + + return control_file_info; + } + + file_major = strtol(file_version_string, &endptr, 10); + file_minor = 0; + + if (*endptr == '.') + file_minor = strtol(endptr + 1, NULL, 10); + + version_num = ((int) file_major * 10000) + ((int) file_minor * 100); + + if (version_num < 90300) + { + log_warning(_("Data directory appears to be initialised for %s"), file_version_string); + return control_file_info; + } + snprintf(ControlFilePath, MAXPGPATH, "%s/global/pg_control", DataDir); @@ -163,7 +196,25 @@ get_controlfile(const char *DataDir) return control_file_info; } - if (read(fd, control_file_info->control_file, sizeof(ControlFileData)) != sizeof(ControlFileData)) + + if (version_num >= 90500) + { + expected_size = sizeof(ControlFileData95); + ControlFileDataPtr = palloc0(expected_size); + } + else if (version_num >= 90400) + { + expected_size = sizeof(ControlFileData94); + ControlFileDataPtr = palloc0(expected_size); + } + else if (version_num >= 90300) + { + expected_size = sizeof(ControlFileData93); + ControlFileDataPtr = palloc0(expected_size); + } + + + if (read(fd, ControlFileDataPtr, expected_size) != expected_size) { log_warning(_("could not read file \"%s\""), ControlFilePath); @@ -176,6 +227,33 @@ get_controlfile(const char *DataDir) control_file_info->control_file_processed = true; + if (version_num >= 90500) + { + ControlFileData95 *ptr = (struct ControlFileData95 *)ControlFileDataPtr; + control_file_info->system_identifier = ptr->system_identifier; + control_file_info->state = ptr->state; + control_file_info->checkPoint = ptr->checkPoint; + control_file_info->data_checksum_version = ptr->data_checksum_version; + } + else if (version_num >= 90400) + { + ControlFileData94 *ptr = (struct ControlFileData94 *)ControlFileDataPtr; + control_file_info->system_identifier = ptr->system_identifier; + control_file_info->state = ptr->state; + control_file_info->checkPoint = ptr->checkPoint; + control_file_info->data_checksum_version = ptr->data_checksum_version; + } + else if (version_num >= 90300) + { + ControlFileData93 *ptr = (struct ControlFileData93 *)ControlFileDataPtr; + control_file_info->system_identifier = ptr->system_identifier; + control_file_info->state = ptr->state; + control_file_info->checkPoint = ptr->checkPoint; + control_file_info->data_checksum_version = ptr->data_checksum_version; + } + + pfree(ControlFileDataPtr); + /* * We don't check the CRC here as we're potentially checking a pg_control * file from a different PostgreSQL version to the one repmgr was compiled diff --git a/controldata.h b/controldata.h index 53a0e61a..d9feb06d 100644 --- a/controldata.h +++ b/controldata.h @@ -12,12 +12,261 @@ #include "postgres_fe.h" #include "catalog/pg_control.h" +/* + * A simplified representation of pg_control containing only those fields + * required by repmgr. + */ typedef struct { bool control_file_processed; - ControlFileData *control_file; + uint64 system_identifier; + DBState state; + XLogRecPtr checkPoint; + uint32 data_checksum_version; } ControlFileInfo; + + +/* Same for 9.3, 9.4 */ +typedef struct CheckPoint93 +{ + XLogRecPtr redo; /* next RecPtr available when we began to + * create CheckPoint (i.e. REDO start point) */ + TimeLineID ThisTimeLineID; /* current TLI */ + TimeLineID PrevTimeLineID; /* previous TLI, if this record begins a new + * timeline (equals ThisTimeLineID otherwise) */ + bool fullPageWrites; /* current full_page_writes */ + uint32 nextXidEpoch; /* higher-order bits of nextXid */ + TransactionId nextXid; /* next free XID */ + Oid nextOid; /* next free OID */ + MultiXactId nextMulti; /* next free MultiXactId */ + MultiXactOffset nextMultiOffset; /* next free MultiXact offset */ + TransactionId oldestXid; /* cluster-wide minimum datfrozenxid */ + Oid oldestXidDB; /* database with minimum datfrozenxid */ + MultiXactId oldestMulti; /* cluster-wide minimum datminmxid */ + Oid oldestMultiDB; /* database with minimum datminmxid */ + pg_time_t time; /* time stamp of checkpoint */ + + TransactionId oldestActiveXid; +} CheckPoint93; + + +/* Same for 9.5, 9.6, 10, HEAD */ +typedef struct CheckPoint95 +{ + XLogRecPtr redo; /* next RecPtr available when we began to + * create CheckPoint (i.e. REDO start point) */ + TimeLineID ThisTimeLineID; /* current TLI */ + TimeLineID PrevTimeLineID; /* previous TLI, if this record begins a new + * timeline (equals ThisTimeLineID otherwise) */ + bool fullPageWrites; /* current full_page_writes */ + uint32 nextXidEpoch; /* higher-order bits of nextXid */ + TransactionId nextXid; /* next free XID */ + Oid nextOid; /* next free OID */ + MultiXactId nextMulti; /* next free MultiXactId */ + MultiXactOffset nextMultiOffset; /* next free MultiXact offset */ + TransactionId oldestXid; /* cluster-wide minimum datfrozenxid */ + Oid oldestXidDB; /* database with minimum datfrozenxid */ + MultiXactId oldestMulti; /* cluster-wide minimum datminmxid */ + Oid oldestMultiDB; /* database with minimum datminmxid */ + pg_time_t time; /* time stamp of checkpoint */ + TransactionId oldestCommitTsXid; /* oldest Xid with valid commit + * timestamp */ + TransactionId newestCommitTsXid; /* newest Xid with valid commit + * timestamp */ + + TransactionId oldestActiveXid; +} CheckPoint95; + + +typedef struct ControlFileData93 +{ + uint64 system_identifier; + + uint32 pg_control_version; /* PG_CONTROL_VERSION */ + uint32 catalog_version_no; /* see catversion.h */ + + DBState state; /* see enum above */ + pg_time_t time; /* time stamp of last pg_control update */ + XLogRecPtr checkPoint; /* last check point record ptr */ + XLogRecPtr prevCheckPoint; /* previous check point record ptr */ + + CheckPoint93 checkPointCopy; /* copy of last check point record */ + + XLogRecPtr unloggedLSN; /* current fake LSN value, for unlogged rels */ + + XLogRecPtr minRecoveryPoint; + TimeLineID minRecoveryPointTLI; + XLogRecPtr backupStartPoint; + XLogRecPtr backupEndPoint; + bool backupEndRequired; + + int wal_level; + int MaxConnections; + int max_prepared_xacts; + int max_locks_per_xact; + + uint32 maxAlign; /* alignment requirement for tuples */ + double floatFormat; /* constant 1234567.0 */ + + uint32 blcksz; /* data block size for this DB */ + uint32 relseg_size; /* blocks per segment of large relation */ + + uint32 xlog_blcksz; /* block size within WAL files */ + uint32 xlog_seg_size; /* size of each WAL segment */ + + uint32 nameDataLen; /* catalog name field width */ + uint32 indexMaxKeys; /* max number of columns in an index */ + + uint32 toast_max_chunk_size; /* chunk size in TOAST tables */ + + /* flag indicating internal format of timestamp, interval, time */ + bool enableIntTimes; /* int64 storage enabled? */ + + /* flags indicating pass-by-value status of various types */ + bool float4ByVal; /* float4 pass-by-value? */ + bool float8ByVal; /* float8, int8, etc pass-by-value? */ + + /* Are data pages protected by checksums? Zero if no checksum version */ + uint32 data_checksum_version; + +} ControlFileData93; + + +/* + * Following fields added since 9.3: + * + * int max_worker_processes; + * int max_prepared_xacts; + * int max_locks_per_xact; + * + */ +typedef struct ControlFileData94 +{ + uint64 system_identifier; + + uint32 pg_control_version; /* PG_CONTROL_VERSION */ + uint32 catalog_version_no; /* see catversion.h */ + + DBState state; /* see enum above */ + pg_time_t time; /* time stamp of last pg_control update */ + XLogRecPtr checkPoint; /* last check point record ptr */ + XLogRecPtr prevCheckPoint; /* previous check point record ptr */ + + CheckPoint93 checkPointCopy; /* copy of last check point record */ + + XLogRecPtr unloggedLSN; /* current fake LSN value, for unlogged rels */ + + XLogRecPtr minRecoveryPoint; + TimeLineID minRecoveryPointTLI; + XLogRecPtr backupStartPoint; + XLogRecPtr backupEndPoint; + bool backupEndRequired; + + int wal_level; + bool wal_log_hints; + int MaxConnections; + int max_worker_processes; + int max_prepared_xacts; + int max_locks_per_xact; + + uint32 maxAlign; /* alignment requirement for tuples */ + double floatFormat; /* constant 1234567.0 */ + + uint32 blcksz; /* data block size for this DB */ + uint32 relseg_size; /* blocks per segment of large relation */ + + uint32 xlog_blcksz; /* block size within WAL files */ + uint32 xlog_seg_size; /* size of each WAL segment */ + + uint32 nameDataLen; /* catalog name field width */ + uint32 indexMaxKeys; /* max number of columns in an index */ + + uint32 toast_max_chunk_size; /* chunk size in TOAST tables */ + uint32 loblksize; /* chunk size in pg_largeobject */ + + bool enableIntTimes; /* int64 storage enabled? */ + + bool float4ByVal; /* float4 pass-by-value? */ + bool float8ByVal; /* float8, int8, etc pass-by-value? */ + + /* Are data pages protected by checksums? Zero if no checksum version */ + uint32 data_checksum_version; + +} ControlFileData94; + + + +/* + * Following field added since 9.4: + * + * bool track_commit_timestamp; + * + * Unchanged in 9.6 + * + * In 10, following field appended *after* "data_checksum_version": + * + * char mock_authentication_nonce[MOCK_AUTH_NONCE_LEN]; + * + * (but we don't care about that) + */ + +typedef struct ControlFileData95 +{ + uint64 system_identifier; + + uint32 pg_control_version; /* PG_CONTROL_VERSION */ + uint32 catalog_version_no; /* see catversion.h */ + + DBState state; /* see enum above */ + pg_time_t time; /* time stamp of last pg_control update */ + XLogRecPtr checkPoint; /* last check point record ptr */ + XLogRecPtr prevCheckPoint; /* previous check point record ptr */ + + CheckPoint95 checkPointCopy; /* copy of last check point record */ + + XLogRecPtr unloggedLSN; /* current fake LSN value, for unlogged rels */ + + XLogRecPtr minRecoveryPoint; + TimeLineID minRecoveryPointTLI; + XLogRecPtr backupStartPoint; + XLogRecPtr backupEndPoint; + bool backupEndRequired; + + int wal_level; + bool wal_log_hints; + int MaxConnections; + int max_worker_processes; + int max_prepared_xacts; + int max_locks_per_xact; + bool track_commit_timestamp; + + uint32 maxAlign; /* alignment requirement for tuples */ + double floatFormat; /* constant 1234567.0 */ + + uint32 blcksz; /* data block size for this DB */ + uint32 relseg_size; /* blocks per segment of large relation */ + + uint32 xlog_blcksz; /* block size within WAL files */ + uint32 xlog_seg_size; /* size of each WAL segment */ + + uint32 nameDataLen; /* catalog name field width */ + uint32 indexMaxKeys; /* max number of columns in an index */ + + uint32 toast_max_chunk_size; /* chunk size in TOAST tables */ + uint32 loblksize; /* chunk size in pg_largeobject */ + + bool enableIntTimes; /* int64 storage enabled? */ + + bool float4ByVal; /* float4 pass-by-value? */ + bool float8ByVal; /* float8, int8, etc pass-by-value? */ + + uint32 data_checksum_version; + +} ControlFileData95; + + + extern DBState get_db_state(const char *data_directory); extern const char *describe_db_state(DBState state); extern int get_data_checksum_version(const char *data_directory); diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index 34b53269..951f1cdb 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -3380,6 +3380,8 @@ do_standby_switchover(void) remote_node_record.node_name, remote_node_record.node_id); } + + /* * Stop the remote primary *