monitor_host.yml 5.36 KB
Newer Older
tec's avatar
tec committed
1
---
2
- hosts: localhost
tec's avatar
tec committed
3 4 5 6 7
  become: true
  become_user: root
  remote_user: root
  roles:
      - cloudalchemy.prometheus
8
      - cloudalchemy.alertmanager
tec's avatar
tec committed
9
      - cloudalchemy.node-exporter
10
      - cloudalchemy.blackbox-exporter
tec's avatar
tec committed
11
      - cloudalchemy.grafana
12 13 14 15 16 17
      # - geerlingguy.certbot
  tasks:
      - name: Allow grafana to bind to ports below 1024
        shell: setcap 'cap_net_bind_service=+ep' /usr/sbin/grafana-server
      - name: Restart grafana
        shell: systemctl restart grafana-server.service
tec's avatar
tec committed
18 19 20 21 22 23 24
      - name: Check if grafana image render plugin is installed
        stat:
          path: /Users/mdtutorials2/Documents/Ansible/prompt.yaml
        register: grafana_img_render_folder_details
      - name: Install grafana image render plugin
        shell: grafana-cli plugins install grafana-image-renderer
        when: !grafana_img_render_folder_details.stat.exists
tec's avatar
tec committed
25 26 27 28
  vars:
      prometheus_targets:
          node:
              - targets:
tec's avatar
tec committed
29
                    - maltair.ucc.asn.au:9100
30
                    - loveday.ucc.asn.au:9100
tec's avatar
tec committed
31
                    - medico.ucc.asn.au:9100
32
                    - magikarp.ucc.asn.au:9100
tec's avatar
tec committed
33
                    - mudkip.ucc.asn.au:9100
34
                labels:
35 36
                    env: ucc
                    job: cluster
37
              - targets:
38
                    - uccmonitor.ucc.asn.au:9100
39
                labels:
40 41
                    env: ucc
                    job: VMs
42
              - targets:
43 44 45
                    - cerberus.ucc.asn.au:9100
                    - molmol.ucc.asn.au:9100
                    - mooneye.ucc.asn.au:9100
tec's avatar
tec committed
46
                    - motsugo.ucc.asn.au:9100
47
                    - murasoi.ucc.asn.au:9100
tec's avatar
tec committed
48
                    - mussel.ucc.asn.au:9100
49 50
                    - samson.ucc.asn.au:9100
                    - titan.ucc.asn.au:9100
tec's avatar
tec committed
51
                labels:
52 53
                    env: ucc
                    job: other
tec's avatar
tec committed
54 55 56
              - targets:
                    - localhost:9093
                labels:
57
                    env: ucc
tec's avatar
tec committed
58 59 60 61
                    job: alertmanager
              - targets:
                    - localhost:3000
                labels:
62
                    env: ucc
tec's avatar
tec committed
63 64 65 66 67 68 69 70 71 72 73 74 75
                    job: grafana
      prometheus_web_external_url: 'http://{{ ansible_host }}:9090'
      prometheus_alertmanager_config:
          - scheme: http
            static_configs:
                - targets: ['127.0.0.1:9093']
      prometheus_scrape_jobs:
          - job_name: 'blackbox'
            metrics_path: /probe
            params:
                module: [http_2xx]
            static_configs:
                - targets:
76 77 78 79 80 81 82 83 84 85 86 87
                      - http://uccmonitor.ucc.asn.au:9100
                      - http://motsugo.ucc.asn.au:9100
                      - http://titan.ucc.asn.au:9100
                      - http://mussel.ucc.asn.au:9100
                      - http://mooneye.ucc.asn.au:9100
                      - http://molmol.ucc.asn.au:9100
                      - http://cerberus.ucc.asn.au:9100
                      - http://loveday.ucc.asn.au:9100
                      - http://magikarp.ucc.asn.au:9100
                      - http://maltair.ucc.asn.au:9100
                      - http://medico.ucc.asn.au:9100
                      - http://mudkip.ucc.asn.au:9100
88 89 90 91 92 93 94
            relabel_configs:
                - source_labels: [__address__]
                  target_label: __param_target
                - source_labels: [__param_target]
                  target_label: instance
                - target_label: __address__
                  replacement: 127.0.0.1:9115 # Blackbox exporter.
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
      alertmanager_external_url: 'http://{{ ansible_host }}:9093'
      alertmanager_receivers:
          - name: 'email-wheel'
            email_configs:
                - to: '[email protected]'
                  from: '[email protected]'
                  smarthost: 'smtp.ucc.com:587'
                  auth_username: '[email protected]'
                  auth_identity: '[email protected]'
                  auth_password: SomePasswordHere
      alertmanager_route:
          group_by: ['alertname', 'cluster', 'service']
          group_wait: 30s
          group_interval: 5m
          repeat_interval: 3h
          receiver: 'email-wheel'
111 112 113 114 115 116 117 118
      grafana_server:
          # protocol: https
          # http_port: 443
          # domain: monitor.ucc.asn.au
          # root_url: https://monitor.ucc.asn.au
          # cert_file: /etc/letsencrypt/live/monitor.ucc.asn.au/fullchain.pem
          # cert_key: /etc/letsencrypt/live/monitor.ucc.asn.au/privkey.pem
          enable_gzip: true
tec's avatar
tec committed
119
      grafana_datasources:
120
          - name: Prometheus
tec's avatar
tec committed
121 122 123 124 125 126 127 128 129 130 131 132
            type: prometheus
            access: proxy
            url: 'http://localhost:9090'
            isDefault: true
            basicAuth: false
      grafana_security:
          admin_user: admin
          admin_password: 'changeme'
      # todo: make LDAP
      grafana_auth:
          anonymous:
              org_name: 'UCC'
133
              org_role: Viewer
tec's avatar
tec committed
134 135 136 137 138 139 140
      grafana_dashboards:
          - dashboard_id: '1860' # Node Exporter Full
            revision_id: '15'
            datasource: 'Prometheus'
          - dashboard_id: '3662' # Prometheus 2.0 Overview
            revision_id: '2'
            datasource: 'Prometheus'
141 142 143 144 145 146
      certbot_certs:
        - domains:
            - monitor.ucc.asn.au
      certbot_auto_renew: true
      certbot_create_if_missing: true 
      certbot_admin_email: [email protected]