---
# Code generated by Sloth (dev): https://github.com/slok/sloth.
# DO NOT EDIT.
groups:
- name: sloth-slo-sli-recordings-k8s-apiserver-requests-availability
rules:
- record: slo:sli_error:ratio_rate5m
expr: |
(sum(rate(apiserver_request_total{code=~"(5..|429)"}[5m])))
/
(sum(rate(apiserver_request_total[5m])))
labels:
category: availability
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-availability
sloth_service: k8s-apiserver
sloth_slo: requests-availability
sloth_window: 5m
- record: slo:sli_error:ratio_rate30m
expr: |
(sum(rate(apiserver_request_total{code=~"(5..|429)"}[30m])))
/
(sum(rate(apiserver_request_total[30m])))
labels:
category: availability
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-availability
sloth_service: k8s-apiserver
sloth_slo: requests-availability
sloth_window: 30m
- record: slo:sli_error:ratio_rate1h
expr: |
(sum(rate(apiserver_request_total{code=~"(5..|429)"}[1h])))
/
(sum(rate(apiserver_request_total[1h])))
labels:
category: availability
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-availability
sloth_service: k8s-apiserver
sloth_slo: requests-availability
sloth_window: 1h
- record: slo:sli_error:ratio_rate2h
expr: |
(sum(rate(apiserver_request_total{code=~"(5..|429)"}[2h])))
/
(sum(rate(apiserver_request_total[2h])))
labels:
category: availability
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-availability
sloth_service: k8s-apiserver
sloth_slo: requests-availability
sloth_window: 2h
- record: slo:sli_error:ratio_rate6h
expr: |
(sum(rate(apiserver_request_total{code=~"(5..|429)"}[6h])))
/
(sum(rate(apiserver_request_total[6h])))
labels:
category: availability
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-availability
sloth_service: k8s-apiserver
sloth_slo: requests-availability
sloth_window: 6h
- record: slo:sli_error:ratio_rate1d
expr: |
(sum(rate(apiserver_request_total{code=~"(5..|429)"}[1d])))
/
(sum(rate(apiserver_request_total[1d])))
labels:
category: availability
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-availability
sloth_service: k8s-apiserver
sloth_slo: requests-availability
sloth_window: 1d
- record: slo:sli_error:ratio_rate3d
expr: |
(sum(rate(apiserver_request_total{code=~"(5..|429)"}[3d])))
/
(sum(rate(apiserver_request_total[3d])))
labels:
category: availability
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-availability
sloth_service: k8s-apiserver
sloth_slo: requests-availability
sloth_window: 3d
- record: slo:sli_error:ratio_rate30d
expr: |
sum_over_time(slo:sli_error:ratio_rate5m{sloth_id="k8s-apiserver-requests-availability", sloth_service="k8s-apiserver", sloth_slo="requests-availability"}[30d])
/ ignoring (sloth_window)
count_over_time(slo:sli_error:ratio_rate5m{sloth_id="k8s-apiserver-requests-availability", sloth_service="k8s-apiserver", sloth_slo="requests-availability"}[30d])
labels:
category: availability
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-availability
sloth_service: k8s-apiserver
sloth_slo: requests-availability
sloth_window: 30d
- name: sloth-slo-meta-recordings-k8s-apiserver-requests-availability
rules:
- record: slo:objective:ratio
expr: vector(0.9990000000000001)
labels:
category: availability
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-availability
sloth_service: k8s-apiserver
sloth_slo: requests-availability
- record: slo:error_budget:ratio
expr: vector(1-0.9990000000000001)
labels:
category: availability
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-availability
sloth_service: k8s-apiserver
sloth_slo: requests-availability
- record: slo:time_period:days
expr: vector(30)
labels:
category: availability
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-availability
sloth_service: k8s-apiserver
sloth_slo: requests-availability
- record: slo:current_burn_rate:ratio
expr: |
slo:sli_error:ratio_rate5m{sloth_id="k8s-apiserver-requests-availability", sloth_service="k8s-apiserver", sloth_slo="requests-availability"}
/ on(sloth_id, sloth_slo, sloth_service) group_left
slo:error_budget:ratio{sloth_id="k8s-apiserver-requests-availability", sloth_service="k8s-apiserver", sloth_slo="requests-availability"}
labels:
category: availability
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-availability
sloth_service: k8s-apiserver
sloth_slo: requests-availability
- record: slo:period_burn_rate:ratio
expr: |
slo:sli_error:ratio_rate30d{sloth_id="k8s-apiserver-requests-availability", sloth_service="k8s-apiserver", sloth_slo="requests-availability"}
/ on(sloth_id, sloth_slo, sloth_service) group_left
slo:error_budget:ratio{sloth_id="k8s-apiserver-requests-availability", sloth_service="k8s-apiserver", sloth_slo="requests-availability"}
labels:
category: availability
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-availability
sloth_service: k8s-apiserver
sloth_slo: requests-availability
- record: slo:period_error_budget_remaining:ratio
expr: 1 - slo:period_burn_rate:ratio{sloth_id="k8s-apiserver-requests-availability",
sloth_service="k8s-apiserver", sloth_slo="requests-availability"}
labels:
category: availability
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-availability
sloth_service: k8s-apiserver
sloth_slo: requests-availability
- record: sloth_slo_info
expr: vector(1)
labels:
category: availability
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-availability
sloth_mode: cli-gen-prom
sloth_objective: "99.9"
sloth_service: k8s-apiserver
sloth_slo: requests-availability
sloth_spec: prometheus/v1
sloth_version: dev
- name: sloth-slo-alerts-k8s-apiserver-requests-availability
rules:
- alert: K8sApiserverAvailabilityAlert
expr: |
(
max(slo:sli_error:ratio_rate5m{sloth_id="k8s-apiserver-requests-availability", sloth_service="k8s-apiserver", sloth_slo="requests-availability"} > (14.4 * 0.0009999999999999432)) without (sloth_window)
and
max(slo:sli_error:ratio_rate1h{sloth_id="k8s-apiserver-requests-availability", sloth_service="k8s-apiserver", sloth_slo="requests-availability"} > (14.4 * 0.0009999999999999432)) without (sloth_window)
)
or
(
max(slo:sli_error:ratio_rate30m{sloth_id="k8s-apiserver-requests-availability", sloth_service="k8s-apiserver", sloth_slo="requests-availability"} > (6 * 0.0009999999999999432)) without (sloth_window)
and
max(slo:sli_error:ratio_rate6h{sloth_id="k8s-apiserver-requests-availability", sloth_service="k8s-apiserver", sloth_slo="requests-availability"} > (6 * 0.0009999999999999432)) without (sloth_window)
)
labels:
category: availability
severity: critical
sloth_severity: page
annotations:
runbook: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
summary: '{{$labels.sloth_service}} {{$labels.sloth_slo}} SLO error budget burn
rate is over expected.'
title: (page) {{$labels.sloth_service}} {{$labels.sloth_slo}} SLO error budget
burn rate is too fast.
- alert: K8sApiserverAvailabilityAlert
expr: |
(
max(slo:sli_error:ratio_rate2h{sloth_id="k8s-apiserver-requests-availability", sloth_service="k8s-apiserver", sloth_slo="requests-availability"} > (3 * 0.0009999999999999432)) without (sloth_window)
and
max(slo:sli_error:ratio_rate1d{sloth_id="k8s-apiserver-requests-availability", sloth_service="k8s-apiserver", sloth_slo="requests-availability"} > (3 * 0.0009999999999999432)) without (sloth_window)
)
or
(
max(slo:sli_error:ratio_rate6h{sloth_id="k8s-apiserver-requests-availability", sloth_service="k8s-apiserver", sloth_slo="requests-availability"} > (1 * 0.0009999999999999432)) without (sloth_window)
and
max(slo:sli_error:ratio_rate3d{sloth_id="k8s-apiserver-requests-availability", sloth_service="k8s-apiserver", sloth_slo="requests-availability"} > (1 * 0.0009999999999999432)) without (sloth_window)
)
labels:
category: availability
severity: warning
sloth_severity: ticket
annotations:
runbook: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
summary: '{{$labels.sloth_service}} {{$labels.sloth_slo}} SLO error budget burn
rate is over expected.'
title: (ticket) {{$labels.sloth_service}} {{$labels.sloth_slo}} SLO error budget
burn rate is too fast.
- name: sloth-slo-sli-recordings-k8s-apiserver-requests-latency
rules:
- record: slo:sli_error:ratio_rate5m
expr: |
((
sum(rate(apiserver_request_duration_seconds_count{verb!="WATCH"}[5m]))
-
sum(rate(apiserver_request_duration_seconds_bucket{le="0.4",verb!="WATCH"}[5m]))
)
)
/
(sum(rate(apiserver_request_duration_seconds_count{verb!="WATCH"}[5m])))
labels:
category: latency
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-latency
sloth_service: k8s-apiserver
sloth_slo: requests-latency
sloth_window: 5m
- record: slo:sli_error:ratio_rate30m
expr: |
((
sum(rate(apiserver_request_duration_seconds_count{verb!="WATCH"}[30m]))
-
sum(rate(apiserver_request_duration_seconds_bucket{le="0.4",verb!="WATCH"}[30m]))
)
)
/
(sum(rate(apiserver_request_duration_seconds_count{verb!="WATCH"}[30m])))
labels:
category: latency
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-latency
sloth_service: k8s-apiserver
sloth_slo: requests-latency
sloth_window: 30m
- record: slo:sli_error:ratio_rate1h
expr: |
((
sum(rate(apiserver_request_duration_seconds_count{verb!="WATCH"}[1h]))
-
sum(rate(apiserver_request_duration_seconds_bucket{le="0.4",verb!="WATCH"}[1h]))
)
)
/
(sum(rate(apiserver_request_duration_seconds_count{verb!="WATCH"}[1h])))
labels:
category: latency
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-latency
sloth_service: k8s-apiserver
sloth_slo: requests-latency
sloth_window: 1h
- record: slo:sli_error:ratio_rate2h
expr: |
((
sum(rate(apiserver_request_duration_seconds_count{verb!="WATCH"}[2h]))
-
sum(rate(apiserver_request_duration_seconds_bucket{le="0.4",verb!="WATCH"}[2h]))
)
)
/
(sum(rate(apiserver_request_duration_seconds_count{verb!="WATCH"}[2h])))
labels:
category: latency
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-latency
sloth_service: k8s-apiserver
sloth_slo: requests-latency
sloth_window: 2h
- record: slo:sli_error:ratio_rate6h
expr: |
((
sum(rate(apiserver_request_duration_seconds_count{verb!="WATCH"}[6h]))
-
sum(rate(apiserver_request_duration_seconds_bucket{le="0.4",verb!="WATCH"}[6h]))
)
)
/
(sum(rate(apiserver_request_duration_seconds_count{verb!="WATCH"}[6h])))
labels:
category: latency
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-latency
sloth_service: k8s-apiserver
sloth_slo: requests-latency
sloth_window: 6h
- record: slo:sli_error:ratio_rate1d
expr: |
((
sum(rate(apiserver_request_duration_seconds_count{verb!="WATCH"}[1d]))
-
sum(rate(apiserver_request_duration_seconds_bucket{le="0.4",verb!="WATCH"}[1d]))
)
)
/
(sum(rate(apiserver_request_duration_seconds_count{verb!="WATCH"}[1d])))
labels:
category: latency
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-latency
sloth_service: k8s-apiserver
sloth_slo: requests-latency
sloth_window: 1d
- record: slo:sli_error:ratio_rate3d
expr: |
((
sum(rate(apiserver_request_duration_seconds_count{verb!="WATCH"}[3d]))
-
sum(rate(apiserver_request_duration_seconds_bucket{le="0.4",verb!="WATCH"}[3d]))
)
)
/
(sum(rate(apiserver_request_duration_seconds_count{verb!="WATCH"}[3d])))
labels:
category: latency
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-latency
sloth_service: k8s-apiserver
sloth_slo: requests-latency
sloth_window: 3d
- record: slo:sli_error:ratio_rate30d
expr: |
sum_over_time(slo:sli_error:ratio_rate5m{sloth_id="k8s-apiserver-requests-latency", sloth_service="k8s-apiserver", sloth_slo="requests-latency"}[30d])
/ ignoring (sloth_window)
count_over_time(slo:sli_error:ratio_rate5m{sloth_id="k8s-apiserver-requests-latency", sloth_service="k8s-apiserver", sloth_slo="requests-latency"}[30d])
labels:
category: latency
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-latency
sloth_service: k8s-apiserver
sloth_slo: requests-latency
sloth_window: 30d
- name: sloth-slo-meta-recordings-k8s-apiserver-requests-latency
rules:
- record: slo:objective:ratio
expr: vector(0.99)
labels:
category: latency
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-latency
sloth_service: k8s-apiserver
sloth_slo: requests-latency
- record: slo:error_budget:ratio
expr: vector(1-0.99)
labels:
category: latency
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-latency
sloth_service: k8s-apiserver
sloth_slo: requests-latency
- record: slo:time_period:days
expr: vector(30)
labels:
category: latency
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-latency
sloth_service: k8s-apiserver
sloth_slo: requests-latency
- record: slo:current_burn_rate:ratio
expr: |
slo:sli_error:ratio_rate5m{sloth_id="k8s-apiserver-requests-latency", sloth_service="k8s-apiserver", sloth_slo="requests-latency"}
/ on(sloth_id, sloth_slo, sloth_service) group_left
slo:error_budget:ratio{sloth_id="k8s-apiserver-requests-latency", sloth_service="k8s-apiserver", sloth_slo="requests-latency"}
labels:
category: latency
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-latency
sloth_service: k8s-apiserver
sloth_slo: requests-latency
- record: slo:period_burn_rate:ratio
expr: |
slo:sli_error:ratio_rate30d{sloth_id="k8s-apiserver-requests-latency", sloth_service="k8s-apiserver", sloth_slo="requests-latency"}
/ on(sloth_id, sloth_slo, sloth_service) group_left
slo:error_budget:ratio{sloth_id="k8s-apiserver-requests-latency", sloth_service="k8s-apiserver", sloth_slo="requests-latency"}
labels:
category: latency
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-latency
sloth_service: k8s-apiserver
sloth_slo: requests-latency
- record: slo:period_error_budget_remaining:ratio
expr: 1 - slo:period_burn_rate:ratio{sloth_id="k8s-apiserver-requests-latency",
sloth_service="k8s-apiserver", sloth_slo="requests-latency"}
labels:
category: latency
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-latency
sloth_service: k8s-apiserver
sloth_slo: requests-latency
- record: sloth_slo_info
expr: vector(1)
labels:
category: latency
cluster: valhalla
cmd: examplesgen.sh
component: kubernetes
sloth_id: k8s-apiserver-requests-latency
sloth_mode: cli-gen-prom
sloth_objective: "99"
sloth_service: k8s-apiserver
sloth_slo: requests-latency
sloth_spec: prometheus/v1
sloth_version: dev
- name: sloth-slo-alerts-k8s-apiserver-requests-latency
rules:
- alert: K8sApiserverLatencyAlert
expr: |
(
max(slo:sli_error:ratio_rate5m{sloth_id="k8s-apiserver-requests-latency", sloth_service="k8s-apiserver", sloth_slo="requests-latency"} > (14.4 * 0.01)) without (sloth_window)
and
max(slo:sli_error:ratio_rate1h{sloth_id="k8s-apiserver-requests-latency", sloth_service="k8s-apiserver", sloth_slo="requests-latency"} > (14.4 * 0.01)) without (sloth_window)
)
or
(
max(slo:sli_error:ratio_rate30m{sloth_id="k8s-apiserver-requests-latency", sloth_service="k8s-apiserver", sloth_slo="requests-latency"} > (6 * 0.01)) without (sloth_window)
and
max(slo:sli_error:ratio_rate6h{sloth_id="k8s-apiserver-requests-latency", sloth_service="k8s-apiserver", sloth_slo="requests-latency"} > (6 * 0.01)) without (sloth_window)
)
labels:
category: latency
severity: critical
sloth_severity: page
annotations:
runbook: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
summary: '{{$labels.sloth_service}} {{$labels.sloth_slo}} SLO error budget burn
rate is over expected.'
title: (page) {{$labels.sloth_service}} {{$labels.sloth_slo}} SLO error budget
burn rate is too fast.
- alert: K8sApiserverLatencyAlert
expr: |
(
max(slo:sli_error:ratio_rate2h{sloth_id="k8s-apiserver-requests-latency", sloth_service="k8s-apiserver", sloth_slo="requests-latency"} > (3 * 0.01)) without (sloth_window)
and
max(slo:sli_error:ratio_rate1d{sloth_id="k8s-apiserver-requests-latency", sloth_service="k8s-apiserver", sloth_slo="requests-latency"} > (3 * 0.01)) without (sloth_window)
)
or
(
max(slo:sli_error:ratio_rate6h{sloth_id="k8s-apiserver-requests-latency", sloth_service="k8s-apiserver", sloth_slo="requests-latency"} > (1 * 0.01)) without (sloth_window)
and
max(slo:sli_error:ratio_rate3d{sloth_id="k8s-apiserver-requests-latency", sloth_service="k8s-apiserver", sloth_slo="requests-latency"} > (1 * 0.01)) without (sloth_window)
)
labels:
category: latency
severity: warning
sloth_severity: ticket
annotations:
runbook: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
summary: '{{$labels.sloth_service}} {{$labels.sloth_slo}} SLO error budget burn
rate is over expected.'
title: (ticket) {{$labels.sloth_service}} {{$labels.sloth_slo}} SLO error budget
burn rate is too fast.