production_fleet_units.txt


### Before starting any fleet units in new cluster, configure etcd ###

./production_service_images_in_etcd.sh


#### start production services ####

### NOTE: if a fleet unit is constantly starting and stoping there is a problem, use journalctl to troubleshoot

## mongodb first to get shock up
## identify host that had latest mongodb on it, edit fleet-unit so it only starts on that host

## on host: ONLY do this first if there was an unclean shutdown
sudo rm -f /media/ephemeral/mongodb-shock/local.*
sudo rm -f /media/ephemeral/mongodb-shock/mongod.lock
sudo rm -rf /media/ephemeral/mongodb-shock/journal
docker exec mongodb-replica mongod --recover

## force a member to be primary when only one in cluster
## where i = position of host in member list
# > use admin
# > rs.status()
# > cfg = rs.conf()
# > cfg.members = [cfg.members[i]]
# > rs.reconfig(cfg, {force : true})

## on host: start first instance and initiate replica set on it
fleetctl start mongodb-replica{,-discovery}@1.shock.service
docker exec mongodb-replica mongo --quiet -u $MONGOD_ADMIN_NAME -p $MONGOD_ADMIN_PASS --eval 'printjson(rs.initiate({_id:"shock",members:[{_id:0,host:"${COREOS_PUBLIC_IPV4}:27017"}]}))' admin
fleetctl start mongodb-replica-update@1.shock.service

# now start the other two, wait for one to finish replicating before start next
# on host: run this to check replica status, 'STARTUP2' mode means still replicating
fleetctl start mongodb-replica{,-discovery,-update}@2.shock.service
fleetctl start mongodb-replica{,-discovery,-update}@3.shock.service
fleetctl start mongodb-replica{,-discovery,-update}@4.shock.service

# this command lets you see cluster hosts and their status
docker exec mongodb-replica mongo --quiet -u $MONGOD_ADMIN_NAME -p $MONGOD_ADMIN_PASS --eval 'printjson(rs.status())' admin

# this command removes missing host from cluster list - run on primary only
docker exec mongodb-replica mongo --quiet -u $MONGOD_ADMIN_NAME -p $MONGOD_ADMIN_PASS --eval 'printjson(rs.remove("${COREOS_PUBLIC_IPV4}:27017"))' admin

## for awe mongodb repeat above, replace 'shock' with 'awe'

## mysql / solr / memcache / awe

fleetctl start mysql_metadata{,-backup}@1.service
fleetctl start solr-m5nr@1.service
fleetctl start solr-metagenome{,-backup}@1.service
fleetctl start memcached.service # global unit
fleetctl start memcached-discovery.service # global unit / start after above completes
fleetctl start awe-server{,-discovery,-monitor}@1.service

## api / web

fleetctl start api-server{,-discovery,-update}@{1,2,3,4}.api.service
fleetctl start mg-rast-v3-web{,-discovery}@{1,2,3,4}.v3-web.service
fleetctl start mg-rast-v4-web{,-discovery}@1.v4-web.service

## confd / nginx

fleetctl start certificates.service
fleetctl start confd.service # global unit
fleetctl start mg-rast-nginx@1.service
fleetctl start certs_renewal@1.service

## cassandra
# first start the nodes, wait for each to finish before starting next
for i in `seq 1 19`; do fleetctl start cassandra-simple{,-discovery}@$i.service; done

# need to manually load data on one seed if starting from scratch
# on one seed host: download load script and run it in seed container
# list of all IPS: fleetctl list-units | grep cassandra | cut -f2 -d"/" | cut -f1 | sort -u | tr "\n" ","; echo
curl -O https://raw.githubusercontent.com/MG-RAST/MG-RAST-infrastructure/master/services/cassandra-load/m5nr/load-cassandra-m5nr.sh
docker cp load-cassandra-m5nr.sh cassandra-simple:/var/lib/cassandra/load-cassandra-m5nr.sh
docker exec cassandra-simple bash -c 'apt-get update && apt-get install -y curl vim openjdk-8-jdk'
docker exec cassandra-simple bash /var/lib/cassandra/load-cassandra-m5nr.sh -i <this host IP> -a <comma seperated list of all cassandra host IPs>

# this command lets you see cluster hosts and their status
docker exec cassandra-simple /usr/bin/nodetool status mgrast_abundance
docker exec cassandra-simple /usr/bin/nodetool status m5nr_v<version #> eg. m5nr_v1

# this command removes missing host from cluster list - run on any host
docker exec cassandra-simple /usr/bin/nodetool removenode HostID

# this command removes a running node from cluster (run on node you want to remove), streams data to other nodes
docker exec cassandra-simple /usr/bin/nodetool decommission
docker exec cassandra-simple /usr/bin/nodetool netstats -H

# reboot of cassandra
fleetctl stop cassandra-simple{,-discovery}@{1..19}.service
fleetctl start cassandra-simple{,-discovery}@{1..19}.service

# docker exec cassandra-simple rm /root/.cassandra/cqlshrc
# docker exec cassandra-simple /usr/bin/cqlsh -e 'desc keyspaces;'
# docker exec cassandra-simple /usr/bin/cqlsh --request-timeout 600 --connect-timeout 600 -e 'USE mgrast_abundance; CONSISTENCY QUORUM; SELECT COUNT(*) FROM job_info;'

## elasticsearch
fleetctl start elasticsearch{,-discovery}@{1..8}.service

# add new m5nr version to solr-m5nr
# docker exec solr-m5nr bash -c 'cd /MG-RAST-infrastructure/services/solr-m5nr && git pull && ./setup-m5nr-core.sh <version #>'

## log-courier
# fleetctl start log-courier.service # global unit

#### start develop services ####

# fleetctl start mg-rast-v3-web{,-discovery}@1.v3-web-dev.service
# fleetctl start mg-rast-v4-web{,-discovery}@1.v4-web-dev.service
# fleetctl start api-server{,-discovery,-update}@1.api-dev.service
# fleetctl start cadvisor.service  # global unit
# fleetctl start opscenter@1.service
# fleetctl start fleetui@1.service

### reboot of web and api fleet-units

for i in 1 2 3; do fleetctl stop mg-rast-v4-web{,-discovery}@${i}.v4-web.service; sleep 10; fleetctl start mg-rast-v4-web{,-discovery}@${i}.v4-web.service; sleep 60; done
for i in 1 2 3 4; do fleetctl stop api-server{,-discovery,-update}@${i}.api.service; sleep 10; fleetctl start api-server{,-discovery,-update}@${i}.api.service; sleep 120; done

### clean shutdown of fleet-units ###

# first save a snapshot what whats running and on what host (mainly need for getting mongodb back up)
fleetctl list-units > fleetunits.txt

# shutdown the production services in the proper order

fleetctl stop mg-rast-nginx@1.service
fleetctl stop confd.service
fleetctl stop mg-rast-v4-web{,-discovery}@{1,2,3}.v4-web.service
fleetctl stop api-server{,-discovery,-update}@{1,2,3,4}.api.service
fleetctl stop awe-server{,-discovery,-monitor}@1.service
fleetctl stop cassandra-simple{,-discovery}@{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}.service
fleetctl stop memcached{,-discovery}.service
fleetctl stop solr-m5nr@1.service
fleetctl stop solr-metagenome{,-backup}@1.service
fleetctl stop mysql_metadata{,-backup}@1.service
fleetctl stop mongodb-replica{,-discovery,-update}@{1,2,3,4}.awe.service
fleetctl stop mongodb-replica{,-discovery,-update}@{1,2,3,4}.shock.service

# now shutdown any others that were missed
for UNIT in `fleetctl list-units -no-legend -fields=unit`; do fleetctl stop ${UNIT}; done

# stop and remove docker containers
for c in `docker ps -a -q`; do docker stop $c; docker rm -f $c; done

# stop via systemd (need to be on host)
sudo systemctl stop <fleet unit>