Skip to content
This repository has been archived by the owner on Nov 18, 2022. It is now read-only.

Commit

Permalink
Merge pull request #7 from tarosky/issue/6
Browse files Browse the repository at this point in the history
Never use Init Container for unidempotent tasks
Close #6.
  • Loading branch information
Harai Akihiro authored Mar 13, 2017
2 parents d769ebd + 5ac9bdc commit b015373
Show file tree
Hide file tree
Showing 9 changed files with 250 additions and 244 deletions.
20 changes: 12 additions & 8 deletions example/sentinel.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,29 +12,33 @@ spec:
annotations:
pod.beta.kubernetes.io/init-containers: '[{
"name": "k8s-redis-ha-sentinel",
"image": "tarosky/k8s-redis-ha:sentinel-1.0.1",
"volumeMounts": [{"name": "opt", "mountPath": "/opt"}],
"env": [
{"name": "SERVICE", "value": "redis-server"},
{"name": "SERVICE_PORT", "value": "redis-server"}
]
"image": "tarosky/k8s-redis-ha:sentinel-2.0.0",
"volumeMounts": [{"name": "opt", "mountPath": "/opt"}]
}]'
spec:
containers:
- name: redis-sentinel
image: library/redis:3.2
command:
- redis-sentinel
- /opt/bin/k8s-redis-ha-sentinel
args:
- /opt/sentinel.conf
env:
- name: SERVICE
value: redis-server
- name: SERVICE_PORT
value: redis-server
ports:
- containerPort: 26379
name: redis-sentinel
readinessProbe:
exec:
command: ['redis-cli', '-p', '26379', 'info', 'server']
volumeMounts:
- name: opt
mountPath: /opt
- name: redis-sword
image: tarosky/k8s-redis-ha:sword-1.0.1
image: tarosky/k8s-redis-ha:sword-2.0.0
env:
- name: SERVICE
value: redis-server
Expand Down
24 changes: 16 additions & 8 deletions example/server.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,32 @@ spec:
annotations:
pod.beta.kubernetes.io/init-containers: '[{
"name": "k8s-redis-ha-server",
"image": "tarosky/k8s-redis-ha:server-1.0.1",
"volumeMounts": [{"name": "opt", "mountPath": "/opt"}],
"env": [
{"name": "SERVICE", "value": "redis-server"},
{"name": "SERVICE_PORT", "value": "redis-server"},
{"name": "SENTINEL", "value": "redis-sentinel"},
{"name": "SENTINEL_PORT", "value": "redis-sentinel"}
]
"image": "tarosky/k8s-redis-ha:server-2.0.0",
"volumeMounts": [{"name": "opt", "mountPath": "/opt"}]
}]'
spec:
containers:
- name: redis-server
image: library/redis:3.2
command:
- /opt/bin/k8s-redis-ha-server
args:
- /opt/redis.conf
env:
- name: SERVICE
value: redis-server
- name: SERVICE_PORT
value: redis-server
- name: SENTINEL
value: redis-sentinel
- name: SENTINEL_PORT
value: redis-sentinel
ports:
- containerPort: 6379
name: redis-server
readinessProbe:
exec:
command: ['redis-cli', 'info', 'server']
volumeMounts:
- name: redis-server-volume
mountPath: /data
Expand Down
7 changes: 3 additions & 4 deletions images/sentinel/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
FROM debian:jessie

RUN apt-get update && apt-get install -y dnsutils redis-tools

COPY ["sentinel.template.conf", "/"]
COPY ["run.sh", "/"]
ADD ["https://github.com/tarosky/dig-a/releases/download/v1.0.1/dig-a_linux_amd64", "/dig-a"]
ADD ["https://github.com/tarosky/dig-srv/releases/download/v1.0.0/dig-srv_linux_amd64", "/dig-srv"]
COPY ["sentinel.template.conf", "k8s-redis-ha-sentinel", "run.sh", "/"]

CMD ["/run.sh"]

Expand Down
66 changes: 66 additions & 0 deletions images/sentinel/k8s-redis-ha-sentinel
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#!/bin/bash

set -eux

readonly namespace="$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace)"
readonly service_domain="_$SERVICE_PORT._tcp.$SERVICE.$namespace.svc.cluster.local"

redis_info () {
set +e
timeout 10 redis-cli -h "$1" -a "$service_domain" info replication
set -e
}

redis_info_role () {
echo "$1" | grep -e '^role:' | cut -d':' -f2 | tr -d '[:space:]'
}

domain_ip () {
/opt/bin/dig-a "$1" | head -1 | awk -F' ' '{print $NF}'
}

server_domains () {
/opt/bin/dig-srv "$1" | awk -F' ' '{print $NF}' | sed 's/\.$//g'
}

run () {
# It's okay to fail during failover or other unpredictable states.
# This prevents from making things much worse.

local -r servers="$(server_domains "$service_domain")"

local master_ip=''

local s
for s in $servers; do
local s_ip="$(domain_ip "$s")"

if [ -z "$s_ip" ]; then
>&2 echo "Failed to resolve: $s"
continue
fi

local i="$(redis_info "$s_ip")"
if [ -n "$i" ]; then
if [ "$(redis_info_role "$i")" = 'master' ]; then
master_ip="$s_ip"
fi
else
>&2 echo "Unable to get Replication INFO: $s ($s_ip)"
continue
fi
done

if [ -z "$master_ip" ]; then
>&2 echo "Master not found."
exit 1
fi

cat /opt/sentinel.template.conf | \
sed "s/%MASTER%/$master_ip/g" | \
sed "s/%PASSWORD%/$service_domain/g" \
> /opt/sentinel.conf
exec docker-entrypoint.sh redis-sentinel "$@"
}

run "$@"
71 changes: 5 additions & 66 deletions images/sentinel/run.sh
Original file line number Diff line number Diff line change
@@ -1,69 +1,8 @@
#!/bin/bash

set -e
set -u
set -x
set -eux

readonly namespace="$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace)"
readonly service_domain="_$SERVICE_PORT._tcp.$SERVICE.$namespace.svc.cluster.local"

redis_info () {
set +e
timeout 10 redis-cli -h "$1" -a "$service_domain" info replication
set -e
}

redis_info_role () {
echo "$1" | grep -e '^role:' | cut -d':' -f2 | tr -d '[:space:]'
}

domain_ip () {
dig +noall +answer a "$1" | head -1 | awk -F' ' '{print $NF}'
}

server_domains () {
dig +noall +answer srv "$1" | awk -F' ' '{print $NF}' | sed 's/\.$//g'
}

run () {
# It's okay to fail during failover or other unpredictable states.
# This prevents from making things much worse.

local -r servers="$(server_domains "$service_domain")"

local master_ip=''

local s
for s in $servers; do
local s_ip="$(domain_ip "$s")"

if [ -z "$s_ip" ]; then
>&2 echo "Failed to resolve: $s"
continue
fi

local i="$(redis_info "$s_ip")"
if [ -n "$i" ]; then
if [ "$(redis_info_role "$i")" = 'master' ]; then
master_ip="$s_ip"
fi
else
>&2 echo "Unable to get Replication INFO: $s ($s_ip)"
continue
fi
done

if [ -z "$master_ip" ]; then
>&2 echo "Master not found."
exit 1
fi

cp /sentinel.template.conf /opt/
cat /sentinel.template.conf | \
sed "s/%MASTER%/$master_ip/g" | \
sed "s/%PASSWORD%/$service_domain/g" \
> /opt/sentinel.conf
exit 0
}

run
mkdir -p /opt/bin
cp /dig-a /dig-srv /k8s-redis-ha-sentinel /opt/bin
cp /sentinel.template.conf /opt
chmod -R +x /opt/bin
7 changes: 3 additions & 4 deletions images/server/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
FROM debian:jessie

RUN apt-get update && apt-get install -y bc dnsutils redis-tools

COPY ["redis.template.conf", "/"]
COPY ["run.sh", "/"]
ADD ["https://github.com/tarosky/dig-a/releases/download/v1.0.1/dig-a_linux_amd64", "/dig-a"]
ADD ["https://github.com/tarosky/dig-srv/releases/download/v1.0.0/dig-srv_linux_amd64", "/dig-srv"]
COPY ["redis.template.conf", "k8s-redis-ha-server", "run.sh", "/"]

CMD ["/run.sh"]

Expand Down
135 changes: 135 additions & 0 deletions images/server/k8s-redis-ha-server
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
#!/bin/bash

set -eux

readonly namespace="$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace)"
readonly service_domain="_$SERVICE_PORT._tcp.$SERVICE.$namespace.svc.cluster.local"
readonly sentinel_domain="_$SENTINEL_PORT._tcp.$SENTINEL.$namespace.svc.cluster.local"

redis_info () {
set +e
timeout 10 redis-cli -h "$1" -a "$service_domain" info replication
set -e
}

reset_sentinel () {
set +e
timeout 10 redis-cli -h "$1" -p 26379 sentinel reset mymaster
set -e
}

redis_info_role () {
echo "$1" | grep -e '^role:' | cut -d':' -f2 | tr -d '[:space:]'
}

domain_ip () {
/opt/bin/dig-a "$1" | head -1 | awk -F' ' '{print $NF}'
}

server_domains () {
/opt/bin/dig-srv "$1" | awk -F' ' '{print $NF}' | sed 's/\.$//g'
}

# At the end of the (succeeded) script, resetting all sentinels is necessary.
# This updates the list of supervised slaves.
# If this task is omitted, the number of "supervised" slaves continues to
# increase because sentinels are unable to recognize the recovered slave
# is the same slave as the dead one.
# Kubernetes may change Pod's IP address on restart.
reset_all_sentinels () {
local -r servers="$(server_domains "$sentinel_domain")"
local s
>&2 echo "Resetting all sentinels: $servers"
for s in $servers; do
local s_ip="$(domain_ip "$s")"

if [ -z "$s_ip" ]; then
>&2 echo "Failed to resolve: $s"
continue
fi

# Ignoring failed sentinels are allowed, since most of the sentinels are
# expected to be alive.
reset_sentinel "$s_ip"
done
}

slave_priority () {
local -r no="$(echo "$(hostname)" | sed -e 's/^.\+-\([0-9]\+\)$/\1/g')"
local -r priority="$(((no + 1) * 10))"
echo "slave-priority $priority"
}

# It's okay to fail during failover or other unpredictable states.
# This prevents from making things much worse.
run () {
cp /opt/redis.template.conf /opt/redis.conf

# Domain name of the Service is also used as the password.
# In this case, password is just an ID to distinguish this replica set from
# other ones in the same Kubernetes cluster.
echo "masterauth $service_domain" >> /opt/redis.conf
echo "requirepass $service_domain" >> /opt/redis.conf

# Replica with smaller number should be the preferred candidate for Master
# over ones with larger number.
# This is because replicas with larger number have higher chance of being
# removed by reducing the number of replica in a StatefulSet.
slave_priority >> /opt/redis.conf

# Headless Service allows newly added Redis server to scan all working servers.
# This enables it to find if it is the first one.
local -r servers="$(server_domains "$service_domain")"
local -r my_host="$(hostname -f)"

local master_ip=''

local only_server=true
local s
for s in $servers; do
# My hostname must be excluded to handle restarts.
if [ "$s" = "$my_host" ]; then
continue
fi

only_server=false

local s_ip="$(domain_ip "$s")"

if [ -z "$s_ip" ]; then
>&2 echo "Failed to resolve: $s"
continue
fi

local i="$(redis_info "$s_ip")"
if [ -n "$i" ]; then
if [ "$(redis_info_role "$i")" = 'master' ]; then
master_ip="$s_ip"
fi
else
>&2 echo "Unable to get Replication INFO: $s ($s_ip)"
continue
fi
done

if [ "$only_server" = true ]; then
# This is an exceptional case: if this is the first server to start in the
# replica, this must be run as Master.
# Otherwise the StatefulSet will be unable to start.
:
else
if [ -z "$master_ip" ]; then
>&2 echo "Unable to start because all servers are slave."
exit 1
fi

# Now the Master server has been found, this server will be launched as
# the slave of the Master.
echo "slaveof $master_ip 6379" >> /opt/redis.conf
fi

reset_all_sentinels
exec docker-entrypoint.sh redis-server "$@"
}

run "$@"
Loading

0 comments on commit b015373

Please sign in to comment.