Greenplum 6.11.2 Cluster Installation and Tuning Guide
- Capacity Planning
1.1 Target Environment
- OS: CentOS 7.8 (kernel 3.10.0-1127.el7.x86_64)
- vCPU / RAM / Disk: 4 vCPU / 8 GB / 40 GB XFS data volume
- Greenplum version: 6.11.2 (download link)
1.2 Node Layout
- OS-Level Preparation
2.1 Hostnames & DNS
hostnamectl set-hostname sdw1
reboot
cat >> /etc/hosts <<eof eof="" mdw="" sdw1="" sdw2="" sdw3=""></eof>
2.2 Disable Firewall & SELinux
systemctl disable --now firewalld
setenforce 0
sed -i 's/SELINUX=enforcing/SELINUX=disabled/' /etc/selinux/config
2.3 Mount XFS with noatime
cat >> /etc/fstab <<eof eof="" mount="" rw="" xfs=""></eof>
2.4 Kernel Parameters (/etc/sysctl.conf)
kernel.shmall = 483888
kernel.shmmax = 1982005248
kernel.shmmni = 4096
vm.overcommit_memory = 2
vm.overcommit_ratio = 95
net.ipv4.ip_local_port_range = 10000 65535
kernel.sem = 500 2048000 200 40960
kernel.sysrq = 1
kernel.core_uses_pid = 1
kernel.msgmnb = 65536
kernel.msgmax = 65536
kernel.msgmni = 2048
net.ipv4.tcp_syncookies = 1
net.ipv4.conf.default.accept_source_route = 0
net.ipv4.tcp_max_syn_backlog = 4096
net.ipv4.conf.all.arp_filter = 1
net.core.netdev_max_backlog = 10000
net.core.rmem_max = 2097152
net.core.wmem_max = 2097152
vm.swappiness = 10
vm.zone_reclaim_mode = 0
vm.dirty_expire_centisecs = 500
vm.dirty_writeback_centisecs = 100
vm.dirty_background_ratio = 3
vm.dirty_ratio = 10
vm.min_free_kbytes = $(awk 'BEGIN{printf "%.0f", $2*0.03}' /proc/meminfo)
Apply: sysctl -p
2.5 File & Process Limits
# /etc/security/limits.d/20-nproc.conf
* soft nproc 131072
root soft nproc unlimited
# /etc/security/limits.conf
* soft nofile 1048576
* hard nofile 1048576
* soft nproc 1048576
* hard nproc 1048576
2.6 Disk Read-Ahead
blockdev --setra 16384 /dev/sd*
blockdev --setra 16384 /dev/dm-*
cat >> /etc/rc.d/rc.local <<eof blockdev="" chmod="" eof=""></eof>
2.7 I/O Scheduler
# SSD: noop / deadline; HDD: deadline
echo deadline > /sys/block/sda/queue/scheduler
# Persist on CentOS 7
sed -i 's/GRUB_CMDLINE_LINUX="/GRUB_CMDLINE_LINUX="elevator=deadline /' /etc/default/grub
grub2-mkconfig -o /boot/grub2/grub.cfg
2.8 Disable Transparent HugePages
echo never > /sys/kernel/mm/transparent_hugepage/enabled
echo never > /sys/kernel/mm/transparent_hugepage/defrag
cat >> /etc/rc.local <<eof echo="" never=""> /sys/kernel/mm/transparent_hugepage/enabled
echo never > /sys/kernel/mm/transparent_hugepage/defrag
EOF
chmod +x /etc/rc.local
</eof>
2.9 NTP & NUMA
# /etc/ntp.conf
server mdw prefer
server 0.pool.ntp.org iburst
systemctl enable --now ntpd
# Disable NUMA
sed -i 's/GRUB_CMDLINE_LINUX="/GRUB_CMDLINE_LINUX="numa=off /' /etc/default/grub
grub2-mkconfig -o /boot/grub2/grub.cfg
- Greenplum Deployment
3.1 Install Dependencies
yum install -y apr apr-util bash bzip2 curl krb5 libcurl libevent libxml2 libyaml zlib openldap openssh openssl openssl-libs perl readline rsync R sed tar zip gcc
3.2 Create OS User
groupadd -g 3030 gpadmin
useradd -u 3030 -g 3030 gpadmin
echo 'gpadmin' | passwd gpadmin --stdin
3.3 Install Greenplum Sfotware
yum -y install greenplum-db-6.11.2-rhel7-x86_64.rpm
chown -R gpadmin:gpadmin /usr/local/greenplum-db
3.4 Environment Variables (gpadmin)
cat >> ~gpadmin/.bash_profile <<eof eof="" export="" master_data_directory="/data/greenplum/data/master/gpseg-1" source=""></eof>
3.5 Host Lists
# /usr/local/greenplum-db/all_host
mdw
sdw1
sdw2
sdw3
# /usr/local/greenplum-db/seg_host
sdw1
sdw2
sdw3
3.6 SSH Trust (root & gpadmin)
ssh-keygen -t rsa -P ''
for h in sdw1 sdw2 sdw3; do ssh-copy-id $h; done
gpssh-exkeys -f all_host
3.7 Create Data Directories
mkdir -p /data/greenplum/data/master
chown gpadmin:gpadmin /data/greenplum/data/master
gpssh -f seg_host -e 'mkdir -p /data/greenplum/data{1,2}/{primary,mirror}'
gpssh -f seg_host -e 'chown -R gpadmin /data/greenplum/data*'
3.8 Valdiate Cluster
gpcheckperf -f seg_host -r N -d /tmp
gpcheckperf -f seg_host -r ds -D -d /data/greenplum/data1/primary
gpssh -f all_host -e 'date'
3.9 Initialize Configuration
cp $GPHOME/docs/cli_help/gpconfigs/gpinitsystem_config ~/gpconfigs/
Edit ~/gpconfigs/gpinitsystem_config:
ARRAY_NAME="Greenplum Data Platform"
SEG_PREFIX=gpseg
PORT_BASE=6000
declare -a DATA_DIRECTORY=(/data/greenplum/data1/primary /data/greenplum/data2/primary)
MASTER_HOSTNAME=mdw
MASTER_DIRECTORY=/data/greenplum/data/master
MASTER_PORT=5432
TRUSTED_SHELL=ssh
CHECK_POINT_SEGMENTS=8
ENCODING=UNICODE
MIRROR_PORT_BASE=7000
declare -a MIRROR_DATA_DIRECTORY=(/data/greenplum/data1/mirror /data/greenplum/data2/mirror)
DATABASE_NAME=db_init
3.10 Initialize Cluster
gpinitsystem -c ~/gpconfigs/gpinitsystem_config -h seg_host -B 8
After success:
psql -d db_init -c "ALTER USER gpadmin PASSWORD 'gpadmin';"
3.11 Add Standby Master
gpinitstandby -s sdw3
- Common Pitfalls
Sempahore exhaustion:
FATAL: could not create semaphores: No space left on device
Fix: reduce kernel.shmall and kernel.shmmax or increase kernel.sem.