-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathproduction_fleet_units.txt
150 lines (110 loc) · 6.88 KB
/
production_fleet_units.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
### Before starting any fleet units in new cluster, configure etcd ###
./production_service_images_in_etcd.sh
#### start production services ####
### NOTE: if a fleet unit is constantly starting and stoping there is a problem, use journalctl to troubleshoot
## mongodb first to get shock up
## identify host that had latest mongodb on it, edit fleet-unit so it only starts on that host
## on host: ONLY do this first if there was an unclean shutdown
sudo rm -f /media/ephemeral/mongodb-shock/local.*
sudo rm -f /media/ephemeral/mongodb-shock/mongod.lock
sudo rm -rf /media/ephemeral/mongodb-shock/journal
docker exec mongodb-replica mongod --recover
## force a member to be primary when only one in cluster
## where i = position of host in member list
# > use admin
# > rs.status()
# > cfg = rs.conf()
# > cfg.members = [cfg.members[i]]
# > rs.reconfig(cfg, {force : true})
## on host: start first instance and initiate replica set on it
fleetctl start mongodb-replica{,-discovery}@1.shock.service
docker exec mongodb-replica mongo --quiet -u $MONGOD_ADMIN_NAME -p $MONGOD_ADMIN_PASS --eval 'printjson(rs.initiate({_id:"shock",members:[{_id:0,host:"${COREOS_PUBLIC_IPV4}:27017"}]}))' admin
fleetctl start [email protected]
# now start the other two, wait for one to finish replicating before start next
# on host: run this to check replica status, 'STARTUP2' mode means still replicating
fleetctl start mongodb-replica{,-discovery,-update}@2.shock.service
fleetctl start mongodb-replica{,-discovery,-update}@3.shock.service
fleetctl start mongodb-replica{,-discovery,-update}@4.shock.service
# this command lets you see cluster hosts and their status
docker exec mongodb-replica mongo --quiet -u $MONGOD_ADMIN_NAME -p $MONGOD_ADMIN_PASS --eval 'printjson(rs.status())' admin
# this command removes missing host from cluster list - run on primary only
docker exec mongodb-replica mongo --quiet -u $MONGOD_ADMIN_NAME -p $MONGOD_ADMIN_PASS --eval 'printjson(rs.remove("${COREOS_PUBLIC_IPV4}:27017"))' admin
## for awe mongodb repeat above, replace 'shock' with 'awe'
## mysql / solr / memcache / awe
fleetctl start mysql_metadata{,-backup}@1.service
fleetctl start [email protected]
fleetctl start solr-metagenome{,-backup}@1.service
fleetctl start memcached.service # global unit
fleetctl start memcached-discovery.service # global unit / start after above completes
fleetctl start awe-server{,-discovery,-monitor}@1.service
## api / web
fleetctl start api-server{,-discovery,-update}@{1,2,3,4}.api.service
fleetctl start mg-rast-v3-web{,-discovery}@{1,2,3,4}.v3-web.service
fleetctl start mg-rast-v4-web{,-discovery}@1.v4-web.service
## confd / nginx
fleetctl start certificates.service
fleetctl start confd.service # global unit
fleetctl start [email protected]
fleetctl start [email protected]
## cassandra
# first start the nodes, wait for each to finish before starting next
for i in `seq 1 19`; do fleetctl start cassandra-simple{,-discovery}@$i.service; done
# need to manually load data on one seed if starting from scratch
# on one seed host: download load script and run it in seed container
# list of all IPS: fleetctl list-units | grep cassandra | cut -f2 -d"/" | cut -f1 | sort -u | tr "\n" ","; echo
curl -O https://raw.githubusercontent.com/MG-RAST/MG-RAST-infrastructure/master/services/cassandra-load/m5nr/load-cassandra-m5nr.sh
docker cp load-cassandra-m5nr.sh cassandra-simple:/var/lib/cassandra/load-cassandra-m5nr.sh
docker exec cassandra-simple bash -c 'apt-get update && apt-get install -y curl vim openjdk-8-jdk'
docker exec cassandra-simple bash /var/lib/cassandra/load-cassandra-m5nr.sh -i <this host IP> -a <comma seperated list of all cassandra host IPs>
# this command lets you see cluster hosts and their status
docker exec cassandra-simple /usr/bin/nodetool status mgrast_abundance
docker exec cassandra-simple /usr/bin/nodetool status m5nr_v<version #> eg. m5nr_v1
# this command removes missing host from cluster list - run on any host
docker exec cassandra-simple /usr/bin/nodetool removenode HostID
# this command removes a running node from cluster (run on node you want to remove), streams data to other nodes
docker exec cassandra-simple /usr/bin/nodetool decommission
docker exec cassandra-simple /usr/bin/nodetool netstats -H
# reboot of cassandra
fleetctl stop cassandra-simple{,-discovery}@{1..19}.service
fleetctl start cassandra-simple{,-discovery}@{1..19}.service
# docker exec cassandra-simple rm /root/.cassandra/cqlshrc
# docker exec cassandra-simple /usr/bin/cqlsh -e 'desc keyspaces;'
# docker exec cassandra-simple /usr/bin/cqlsh --request-timeout 600 --connect-timeout 600 -e 'USE mgrast_abundance; CONSISTENCY QUORUM; SELECT COUNT(*) FROM job_info;'
## elasticsearch
fleetctl start elasticsearch{,-discovery}@{1..8}.service
# add new m5nr version to solr-m5nr
# docker exec solr-m5nr bash -c 'cd /MG-RAST-infrastructure/services/solr-m5nr && git pull && ./setup-m5nr-core.sh <version #>'
## log-courier
# fleetctl start log-courier.service # global unit
#### start develop services ####
# fleetctl start mg-rast-v3-web{,-discovery}@1.v3-web-dev.service
# fleetctl start mg-rast-v4-web{,-discovery}@1.v4-web-dev.service
# fleetctl start api-server{,-discovery,-update}@1.api-dev.service
# fleetctl start cadvisor.service # global unit
# fleetctl start [email protected]
# fleetctl start [email protected]
### reboot of web and api fleet-units
for i in 1 2 3; do fleetctl stop mg-rast-v4-web{,-discovery}@${i}.v4-web.service; sleep 10; fleetctl start mg-rast-v4-web{,-discovery}@${i}.v4-web.service; sleep 60; done
for i in 1 2 3 4; do fleetctl stop api-server{,-discovery,-update}@${i}.api.service; sleep 10; fleetctl start api-server{,-discovery,-update}@${i}.api.service; sleep 120; done
### clean shutdown of fleet-units ###
# first save a snapshot what whats running and on what host (mainly need for getting mongodb back up)
fleetctl list-units > fleetunits.txt
# shutdown the production services in the proper order
fleetctl stop [email protected]
fleetctl stop confd.service
fleetctl stop mg-rast-v4-web{,-discovery}@{1,2,3}.v4-web.service
fleetctl stop api-server{,-discovery,-update}@{1,2,3,4}.api.service
fleetctl stop awe-server{,-discovery,-monitor}@1.service
fleetctl stop cassandra-simple{,-discovery}@{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}.service
fleetctl stop memcached{,-discovery}.service
fleetctl stop [email protected]
fleetctl stop solr-metagenome{,-backup}@1.service
fleetctl stop mysql_metadata{,-backup}@1.service
fleetctl stop mongodb-replica{,-discovery,-update}@{1,2,3,4}.awe.service
fleetctl stop mongodb-replica{,-discovery,-update}@{1,2,3,4}.shock.service
# now shutdown any others that were missed
for UNIT in `fleetctl list-units -no-legend -fields=unit`; do fleetctl stop ${UNIT}; done
# stop and remove docker containers
for c in `docker ps -a -q`; do docker stop $c; docker rm -f $c; done
# stop via systemd (need to be on host)
sudo systemctl stop <fleet unit>