dockerland · yantisj · Mar 2, 2017
diff --git a/talks/20170301_Container-Monitoring/README.md b/talks/20170301_Container-Monitoring/README.md
@@ -0,0 +1,53 @@
+# Overview
+
+## Prometheus
+
+Prometheus, a [Cloud Native Computing Foundation](https://cncf.io/) project, is a systems and service monitoring system. It collects metrics
+from configured targets at given intervals, evaluates rule expressions,
+displays the results, and can trigger alerts if some condition is observed
+to be true.
+
+### Highlights
+
+- Consumes up to 800,000 metrics per second on a single server
+- Static Go Binary
+- Support both pull and push methods
+- Large number of clients (cAdvisor, node-exporter, experimental docker support in 1.13)
+- Supports advanced rule evaluation such as linear predictions and quantile analysis
+
+### Caveats
+
+- No down-sampling support
+- No official long-term storage support (two week default)
+- No clustering (must run multiple instances for redundancy)
+- Must run separate alert manager (or depend on Grafana)
+
+### Pull versus Push
+
+- Both are scalable
+- Adopt the model that makes sense in your environment
+- Personally prefer pull, no reconfiguring of clients to deploy a new instance
+
+## Grafana
+
+Creates beautiful charts from time series databases, including Prometheus and other.
+
+### Highlights
+
+- Prefered Prometheus visualization tool
+- Built-in alerting as of 4.0
+- Query syntax is identical to Prometheus
+- Canned dashboards for cAdvisor, node-exporter, Redis etc available on their website
+
+## cAdvisor
+
+- Container exporter from Google
+- Native Prometheus support at /metrics
+- Provides network, storage, CPU and memory metrics per container for Prometheus
+- Canned dashboard available for Grafana
+
+## Custom Endpoints
+
+- Build /metrics endpoints directly into your application
+- Expose metrics on latency, number of calls and other metrics directly to Prometheus
+
diff --git a/talks/20170301_Container-Monitoring/docker-compose.yml b/talks/20170301_Container-Monitoring/docker-compose.yml
@@ -0,0 +1,34 @@
+version: '2'
+services:
+
+  prometheus:
+    image: prom/prometheus
+    ports:
+      - 19090:9090
+    volumes:
+      - ./prometheus.yml:/etc/prometheus/prometheus.yml
+    #   #- ./prometheus-data:/prometheus-data
+    #   - ./prometheus-data:/prometheus/data
+
+
+  cadvisor:
+    image: google/cadvisor:latest
+    ports:
+      - 9080:8080
+
+  node-exporter:
+    image: prom/node-exporter
+    ports:
+      - 9100:9100
+
+  grafana:
+    image: grafana/grafana
+    ports:
+      - 3000:3000
+    environment:
+      - GF_SECURITY_ADMIN_PASSWORD=secret
+      - GF_AUTH_ANONYMOUS_ENABLED=true
+    volumes:
+      - ./grafana:/var/lib/grafana
+    links:
+      - prometheus
diff --git a/talks/20170301_Container-Monitoring/grafana/grafana.db b/talks/20170301_Container-Monitoring/grafana/grafana.db
diff --git a/talks/20170301_Container-Monitoring/grafana/sessions/1/f/1fcab8fe71c1164f b/talks/20170301_Container-Monitoring/grafana/sessions/1/f/1fcab8fe71c1164f
diff --git a/talks/20170301_Container-Monitoring/prometheus.yml b/talks/20170301_Container-Monitoring/prometheus.yml
@@ -0,0 +1,45 @@
+# my global config
+global:
+  scrape_interval:     15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
+  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
+  # scrape_timeout is set to the global default (10s).
+
+  # Attach these labels to any time series or alerts when communicating with
+  # external systems (federation, remote storage, Alertmanager).
+  external_labels:
+      monitor: 'dcmap-monitor'
+
+# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
+rule_files:
+  # - "first.rules"
+  # - "second.rules"
+
+# A scrape configuration containing exactly one endpoint to scrape:
+# Here it's Prometheus itself.
+scrape_configs:
+  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
+  - job_name: 'prometheus'
+    # metrics_path defaults to '/metrics'
+    # scheme defaults to 'http'.
+    scrape_interval: 60s
+
+    static_configs:
+      - targets: ['prometheus:9090']
+
+  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
+  - job_name: 'cadvisor'
+    # metrics_path defaults to '/metrics'
+    # scheme defaults to 'http'.
+    scrape_interval: 60s
+
+    static_configs:
+      - targets: ['cadvisor:8080']
+
+  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
+  - job_name: 'node-exporter'
+    # metrics_path defaults to '/metrics'
+    # scheme defaults to 'http'.
+    scrape_interval: 60s
+
+    static_configs:
+      - targets: ['node-exporter:9100']