From 81d4fb36a9a443dee2f399bc0dea3ed03b4163af Mon Sep 17 00:00:00 2001 From: Kimgyuilli Date: Wed, 21 Jan 2026 22:54:38 +0900 Subject: [PATCH 01/23] =?UTF-8?q?feat(build.gradle):=20prometheus=20?= =?UTF-8?q?=EC=9D=98=EC=A1=B4=EC=84=B1=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- build.gradle | 3 +++ 1 file changed, 3 insertions(+) diff --git a/build.gradle b/build.gradle index af69a1c6..d742c96c 100644 --- a/build.gradle +++ b/build.gradle @@ -54,6 +54,9 @@ dependencies { implementation 'net.logstash.logback:logstash-logback-encoder:8.1' implementation 'com.github.napstr:logback-discord-appender:1.0.0' + // Metrics + implementation 'io.micrometer:micrometer-registry-prometheus' + // Test testImplementation 'org.springframework.boot:spring-boot-starter-test' testRuntimeOnly 'org.junit.platform:junit-platform-launcher' From 212a2a3af92772a06f6511be8587fdb4cd3dc5b4 Mon Sep 17 00:00:00 2001 From: Kimgyuilli Date: Wed, 21 Jan 2026 23:00:06 +0900 Subject: [PATCH 02/23] =?UTF-8?q?feat(global):=20application-monitoring.ya?= =?UTF-8?q?ml=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/resources/application-dev.yaml | 4 ---- src/main/resources/application-monitoring.yaml | 18 ++++++++++++++++++ src/main/resources/application-prod.yaml | 6 ++---- src/main/resources/application.yaml | 1 + 4 files changed, 21 insertions(+), 8 deletions(-) create mode 100644 src/main/resources/application-monitoring.yaml diff --git a/src/main/resources/application-dev.yaml b/src/main/resources/application-dev.yaml index eb402da1..f36034ae 100644 --- a/src/main/resources/application-dev.yaml +++ b/src/main/resources/application-dev.yaml @@ -10,10 +10,6 @@ spring: base-url: ${SERVER_URL:http://localhost:8080} management: - endpoints: - web: - exposure: - include: health endpoint: health: show-details: always diff --git a/src/main/resources/application-monitoring.yaml b/src/main/resources/application-monitoring.yaml new file mode 100644 index 00000000..7c62aaa6 --- /dev/null +++ b/src/main/resources/application-monitoring.yaml @@ -0,0 +1,18 @@ +management: + endpoints: + web: + exposure: + include: health,prometheus,metrics,info + endpoint: + health: + show-details: when_authorized + prometheus: + access: + metrics: + tags: + application: cherrish + distribution: + percentiles-histogram: + http.server.requests: true + percentiles: + http.server.requests: 0.5, 0.95, 0.99 diff --git a/src/main/resources/application-prod.yaml b/src/main/resources/application-prod.yaml index 2ddca3fa..101c2cbf 100644 --- a/src/main/resources/application-prod.yaml +++ b/src/main/resources/application-prod.yaml @@ -10,10 +10,8 @@ spring: base-url: ${SERVER_URL:http://localhost:8080} management: - endpoints: - web: - exposure: - include: health + server: + port: 8081 endpoint: health: show-details: never diff --git a/src/main/resources/application.yaml b/src/main/resources/application.yaml index e5483e7a..83c301c5 100644 --- a/src/main/resources/application.yaml +++ b/src/main/resources/application.yaml @@ -7,6 +7,7 @@ spring: include: - db - openai + - monitoring config: import: optional:file:.env[.properties] From 8d72446ef7ae8e47cfdec53f112042ebccba3e11 Mon Sep 17 00:00:00 2001 From: Kimgyuilli Date: Wed, 21 Jan 2026 23:14:47 +0900 Subject: [PATCH 03/23] =?UTF-8?q?feat(docker):=20=EB=AA=A8=EB=8B=88?= =?UTF-8?q?=ED=84=B0=EB=A7=81=EC=9A=A9=20docker-compose=20=EC=9E=91?= =?UTF-8?q?=EC=84=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docker-compose.monitoring.yml | 45 +++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 docker-compose.monitoring.yml diff --git a/docker-compose.monitoring.yml b/docker-compose.monitoring.yml new file mode 100644 index 00000000..7e19ad0c --- /dev/null +++ b/docker-compose.monitoring.yml @@ -0,0 +1,45 @@ +version: '3.8' + +services: + prometheus: + image: prom/prometheus:v2.45.0 + container_name: cherrish-prometheus + ports: + - "9090:9090" + volumes: + - ./monitoring/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml + - prometheus_data:/prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--storage.tsdb.retention.time=15d' + - '--web.enable-lifecycle' + networks: + - monitoring + restart: unless-stopped + + grafana: + image: grafana/grafana:10.0.0 + container_name: cherrish-grafana + ports: + - "3000:3000" + environment: + - GF_SECURITY_ADMIN_USER=admin + - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD:-admin} + - GF_USERS_ALLOW_SIGN_UP=false + volumes: + - ./monitoring/grafana/provisioning:/etc/grafana/provisioning + - grafana_data:/var/lib/grafana + networks: + - monitoring + depends_on: + - prometheus + restart: unless-stopped + +volumes: + prometheus_data: + grafana_data: + +networks: + monitoring: + driver: bridge From d2ec41c0b545d166397a2d61efc8581dacbfb34d Mon Sep 17 00:00:00 2001 From: Kimgyuilli Date: Wed, 21 Jan 2026 23:17:07 +0900 Subject: [PATCH 04/23] =?UTF-8?q?feat(monitoring):=20prometheus=20?= =?UTF-8?q?=EC=84=A4=EC=A0=95=20=ED=8C=8C=EC=9D=BC=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- monitoring/prometheus/prometheus.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 monitoring/prometheus/prometheus.yml diff --git a/monitoring/prometheus/prometheus.yml b/monitoring/prometheus/prometheus.yml new file mode 100644 index 00000000..853a75b8 --- /dev/null +++ b/monitoring/prometheus/prometheus.yml @@ -0,0 +1,16 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:9090'] + + - job_name: 'cherrish-server' + metrics_path: '/actuator/prometheus' + static_configs: + - targets: ['host.docker.internal:8080'] # Docker에서 호스트 접근 (로컬 개발용) + # 프로덕션에서는 실제 서버 주소 사용 + # static_configs: + # - targets: ['cherrish-server:8081'] From f0b5b4bd2490bd83e0902b13e6bb9125fc67c7b2 Mon Sep 17 00:00:00 2001 From: Kimgyuilli Date: Wed, 21 Jan 2026 23:19:38 +0900 Subject: [PATCH 05/23] =?UTF-8?q?feat(monitoring):=20Prometheus=20+=20Graf?= =?UTF-8?q?ana=20=EB=AA=A8=EB=8B=88=ED=84=B0=EB=A7=81=20=EC=8A=A4=ED=83=9D?= =?UTF-8?q?=20=EA=B5=AC=EC=B6=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../grafana/provisioning/dashboards/dashboard.yml | 11 +++++++++++ .../grafana/provisioning/datasources/datasource.yml | 9 +++++++++ 2 files changed, 20 insertions(+) create mode 100644 monitoring/grafana/provisioning/dashboards/dashboard.yml create mode 100644 monitoring/grafana/provisioning/datasources/datasource.yml diff --git a/monitoring/grafana/provisioning/dashboards/dashboard.yml b/monitoring/grafana/provisioning/dashboards/dashboard.yml new file mode 100644 index 00000000..5ef25c42 --- /dev/null +++ b/monitoring/grafana/provisioning/dashboards/dashboard.yml @@ -0,0 +1,11 @@ +apiVersion: 1 + +providers: + - name: 'Cherrish Dashboards' + orgId: 1 + folder: 'Cherrish' + type: file + disableDeletion: false + updateIntervalSeconds: 30 + options: + path: /etc/grafana/provisioning/dashboards/json diff --git a/monitoring/grafana/provisioning/datasources/datasource.yml b/monitoring/grafana/provisioning/datasources/datasource.yml new file mode 100644 index 00000000..bb009bb2 --- /dev/null +++ b/monitoring/grafana/provisioning/datasources/datasource.yml @@ -0,0 +1,9 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: false From 3c918a67e846fe9cfc85637d2ec5f724762af173 Mon Sep 17 00:00:00 2001 From: Kimgyuilli Date: Thu, 22 Jan 2026 03:55:44 +0900 Subject: [PATCH 06/23] =?UTF-8?q?feat(monitoring):=20jvm=20=EB=8C=80?= =?UTF-8?q?=EC=8B=9C=EB=B3=B4=EB=93=9C=20=ED=8C=A8=EB=84=90=20=EA=B5=AC?= =?UTF-8?q?=EC=84=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../dashboards/json/cherrish-overview.json | 512 ++++++++++++++++++ .../provisioning/datasources/datasource.yml | 1 + 2 files changed, 513 insertions(+) create mode 100644 monitoring/grafana/provisioning/dashboards/json/cherrish-overview.json diff --git a/monitoring/grafana/provisioning/dashboards/json/cherrish-overview.json b/monitoring/grafana/provisioning/dashboards/json/cherrish-overview.json new file mode 100644 index 00000000..26ad37af --- /dev/null +++ b/monitoring/grafana/provisioning/dashboards/json/cherrish-overview.json @@ -0,0 +1,512 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, + "id": 1, + "panels": [], + "title": "JVM Metrics", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "red", "value": 80 } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 1 }, + "id": 2, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "jvm_memory_used_bytes{application=\"cherrish\", area=\"heap\"}", + "legendFormat": "{{id}}", + "refId": "A" + } + ], + "title": "JVM Heap Memory Used", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "red", "value": 80 } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 1 }, + "id": 3, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "rate(jvm_gc_pause_seconds_sum{application=\"cherrish\"}[5m])", + "legendFormat": "{{cause}} - {{action}}", + "refId": "A" + } + ], + "title": "GC Pause Time (Rate)", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "red", "value": 80 } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 9 }, + "id": 4, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "jvm_threads_live_threads{application=\"cherrish\"}", + "legendFormat": "Live Threads", + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "jvm_threads_daemon_threads{application=\"cherrish\"}", + "legendFormat": "Daemon Threads", + "refId": "B" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "jvm_threads_peak_threads{application=\"cherrish\"}", + "legendFormat": "Peak Threads", + "refId": "C" + } + ], + "title": "JVM Threads", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "red", "value": 80 } + ] + }, + "unit": "percentunit", + "min": 0, + "max": 1 + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 9 }, + "id": 5, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "process_cpu_usage{application=\"cherrish\"}", + "legendFormat": "Process CPU", + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "system_cpu_usage{application=\"cherrish\"}", + "legendFormat": "System CPU", + "refId": "B" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 17 }, + "id": 6, + "panels": [], + "title": "HTTP Requests", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "red", "value": 80 } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 18 }, + "id": 7, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(rate(http_server_requests_seconds_count{application=\"cherrish\"}[5m])) by (uri)", + "legendFormat": "{{uri}}", + "refId": "A" + } + ], + "title": "Request Rate (per URI)", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "red", "value": 80 } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 18 }, + "id": 8, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(rate(http_server_requests_seconds_count{application=\"cherrish\", status=~\"4..|5..\"}[5m])) by (status)", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Error Rate (4xx, 5xx)", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "red", "value": 80 } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 26 }, + "id": 9, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "histogram_quantile(0.50, sum(rate(http_server_requests_seconds_bucket{application=\"cherrish\"}[5m])) by (le))", + "legendFormat": "P50", + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "histogram_quantile(0.95, sum(rate(http_server_requests_seconds_bucket{application=\"cherrish\"}[5m])) by (le))", + "legendFormat": "P95", + "refId": "B" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "histogram_quantile(0.99, sum(rate(http_server_requests_seconds_bucket{application=\"cherrish\"}[5m])) by (le))", + "legendFormat": "P99", + "refId": "C" + } + ], + "title": "Response Time Percentiles", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "red", "value": 80 } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 26 }, + "id": 10, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(rate(http_server_requests_seconds_count{application=\"cherrish\"}[5m]))", + "legendFormat": "Total Throughput", + "refId": "A" + } + ], + "title": "Total Throughput", + "type": "timeseries" + } + ], + "refresh": "5s", + "schemaVersion": 38, + "tags": ["cherrish", "spring-boot"], + "templating": { "list": [] }, + "time": { "from": "now-1h", "to": "now" }, + "timepicker": {}, + "timezone": "browser", + "title": "Cherrish Overview", + "uid": "cherrish-overview", + "version": 1, + "weekStart": "" +} diff --git a/monitoring/grafana/provisioning/datasources/datasource.yml b/monitoring/grafana/provisioning/datasources/datasource.yml index bb009bb2..8704d9e5 100644 --- a/monitoring/grafana/provisioning/datasources/datasource.yml +++ b/monitoring/grafana/provisioning/datasources/datasource.yml @@ -3,6 +3,7 @@ apiVersion: 1 datasources: - name: Prometheus type: prometheus + uid: prometheus access: proxy url: http://prometheus:9090 isDefault: true From b900456638e8874c58b065b03dee0517e59aabed Mon Sep 17 00:00:00 2001 From: Kimgyuilli Date: Thu, 22 Jan 2026 05:04:00 +0900 Subject: [PATCH 07/23] =?UTF-8?q?feat(monitoring):=20Grafana=20=EB=94=94?= =?UTF-8?q?=EC=8A=A4=EC=BD=94=EB=93=9C=20=EB=AA=A8=EB=8B=88=ED=84=B0?= =?UTF-8?q?=EB=A7=81=20=EA=B5=AC=ED=98=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docker-compose.monitoring.yml | 1 + .../provisioning/alerting/contactpoints.yml | 12 ++ .../provisioning/alerting/policies.yml | 11 ++ .../grafana/provisioning/alerting/rules.yml | 111 ++++++++++++++++++ 4 files changed, 135 insertions(+) create mode 100644 monitoring/grafana/provisioning/alerting/contactpoints.yml create mode 100644 monitoring/grafana/provisioning/alerting/policies.yml create mode 100644 monitoring/grafana/provisioning/alerting/rules.yml diff --git a/docker-compose.monitoring.yml b/docker-compose.monitoring.yml index 7e19ad0c..5b7f8a3a 100644 --- a/docker-compose.monitoring.yml +++ b/docker-compose.monitoring.yml @@ -27,6 +27,7 @@ services: - GF_SECURITY_ADMIN_USER=admin - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD:-admin} - GF_USERS_ALLOW_SIGN_UP=false + - DISCORD_MONITORING_WEBHOOK_URL=${DISCORD_MONITORING_WEBHOOK_URL:-} volumes: - ./monitoring/grafana/provisioning:/etc/grafana/provisioning - grafana_data:/var/lib/grafana diff --git a/monitoring/grafana/provisioning/alerting/contactpoints.yml b/monitoring/grafana/provisioning/alerting/contactpoints.yml new file mode 100644 index 00000000..944ad92f --- /dev/null +++ b/monitoring/grafana/provisioning/alerting/contactpoints.yml @@ -0,0 +1,12 @@ +apiVersion: 1 + +contactPoints: + - orgId: 1 + name: discord-monitoring + receivers: + - uid: discord-monitoring + type: discord + settings: + url: ${DISCORD_MONITORING_WEBHOOK_URL} + use_discord_username: true + disableResolveMessage: false diff --git a/monitoring/grafana/provisioning/alerting/policies.yml b/monitoring/grafana/provisioning/alerting/policies.yml new file mode 100644 index 00000000..618f5dc9 --- /dev/null +++ b/monitoring/grafana/provisioning/alerting/policies.yml @@ -0,0 +1,11 @@ +apiVersion: 1 + +policies: + - orgId: 1 + receiver: discord-monitoring + group_by: + - grafana_folder + - alertname + group_wait: 30s + group_interval: 5m + repeat_interval: 4h diff --git a/monitoring/grafana/provisioning/alerting/rules.yml b/monitoring/grafana/provisioning/alerting/rules.yml new file mode 100644 index 00000000..7b294be7 --- /dev/null +++ b/monitoring/grafana/provisioning/alerting/rules.yml @@ -0,0 +1,111 @@ +apiVersion: 1 + +groups: + - orgId: 1 + name: cherrish-alerts + folder: Cherrish + interval: 1m + rules: + - uid: high-error-rate + title: High Error Rate + condition: C + data: + - refId: A + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: prometheus + model: + expr: sum(rate(http_server_requests_seconds_count{application="cherrish", status=~"5.."}[5m])) or vector(0) + intervalMs: 1000 + maxDataPoints: 43200 + refId: A + - refId: B + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: prometheus + model: + expr: sum(rate(http_server_requests_seconds_count{application="cherrish"}[5m])) or vector(1) + intervalMs: 1000 + maxDataPoints: 43200 + refId: B + - refId: C + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 0.05 + type: gt + operator: + type: and + query: + params: + - C + reducer: + type: last + expression: $A / $B + intervalMs: 1000 + maxDataPoints: 43200 + refId: C + type: math + noDataState: OK + execErrState: Error + for: 2m + annotations: + summary: "High error rate detected on Cherrish Server" + description: "Error rate is above 5% for the last 2 minutes" + labels: + severity: critical + isPaused: false + + - uid: high-latency + title: High Latency (P95) + condition: B + data: + - refId: A + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: prometheus + model: + expr: histogram_quantile(0.95, sum(rate(http_server_requests_seconds_bucket{application="cherrish"}[5m])) by (le)) + intervalMs: 1000 + maxDataPoints: 43200 + refId: A + - refId: B + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 1 + type: gt + operator: + type: and + query: + params: + - B + reducer: + type: last + expression: A + intervalMs: 1000 + maxDataPoints: 43200 + refId: B + type: threshold + noDataState: OK + execErrState: Error + for: 5m + annotations: + summary: "High latency detected on Cherrish Server" + description: "P95 response time is above 1 second for the last 5 minutes" + labels: + severity: warning + isPaused: false From 2e2eae9b58bcaabdca687653c0a9ea0014198ef0 Mon Sep 17 00:00:00 2001 From: Kimgyuilli Date: Thu, 22 Jan 2026 05:31:38 +0900 Subject: [PATCH 08/23] =?UTF-8?q?feat(monitoring):=20docker-compose=20?= =?UTF-8?q?=EB=AA=A8=EB=8B=88=ED=84=B0=EB=A7=81=20yml=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docker-compose.monitoring.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.monitoring.yml b/docker-compose.monitoring.yml index 5b7f8a3a..ca85093a 100644 --- a/docker-compose.monitoring.yml +++ b/docker-compose.monitoring.yml @@ -27,7 +27,7 @@ services: - GF_SECURITY_ADMIN_USER=admin - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD:-admin} - GF_USERS_ALLOW_SIGN_UP=false - - DISCORD_MONITORING_WEBHOOK_URL=${DISCORD_MONITORING_WEBHOOK_URL:-} + - DISCORD_MONITORING_WEBHOOK_URL=${DISCORD_MONITORING_WEBHOOK_URL} volumes: - ./monitoring/grafana/provisioning:/etc/grafana/provisioning - grafana_data:/var/lib/grafana From fd61eaaeb7612f78c7dcf1ab48282487dd9339c1 Mon Sep 17 00:00:00 2001 From: Kimgyuilli Date: Thu, 22 Jan 2026 18:51:07 +0900 Subject: [PATCH 09/23] =?UTF-8?q?feat(grafana):=20=EB=94=94=EC=8A=A4?= =?UTF-8?q?=EC=BD=94=EB=93=9C=20=EC=95=8C=EB=A6=BC=20=ED=98=95=EC=8B=9D=20?= =?UTF-8?q?=EA=B0=9C=EC=84=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../provisioning/alerting/contactpoints.yml | 15 ++++++++++++ .../monitoring/MonitoringTestController.java | 23 +++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 src/main/java/com/sopt/cherrish/global/monitoring/MonitoringTestController.java diff --git a/monitoring/grafana/provisioning/alerting/contactpoints.yml b/monitoring/grafana/provisioning/alerting/contactpoints.yml index 944ad92f..da4d5ee5 100644 --- a/monitoring/grafana/provisioning/alerting/contactpoints.yml +++ b/monitoring/grafana/provisioning/alerting/contactpoints.yml @@ -9,4 +9,19 @@ contactPoints: settings: url: ${DISCORD_MONITORING_WEBHOOK_URL} use_discord_username: true + avatar_url: "https://grafana.com/static/assets/img/fav32.png" + title: '{{ if eq .Status "firing" }}:rotating_light: ALERT{{ else }}:white_check_mark: RESOLVED{{ end }}' + message: | + **:bar_chart: {{ .CommonLabels.rulename }}** + + {{ if eq .Status "firing" }}:red_circle:{{ else }}:green_circle:{{ end }} **Status:** {{ .Status | toUpper }} + :warning: **Severity:** {{ .CommonLabels.severity | toUpper }} + :file_folder: **Folder:** {{ .CommonLabels.grafana_folder }} + + {{ range .Alerts }} + ---------------------- + :clipboard: **Summary:** {{ .Annotations.summary }} + :memo: **Detail:** {{ .Annotations.description }} + :link: **View:** {{ .GeneratorURL }} + {{ end }} disableResolveMessage: false diff --git a/src/main/java/com/sopt/cherrish/global/monitoring/MonitoringTestController.java b/src/main/java/com/sopt/cherrish/global/monitoring/MonitoringTestController.java new file mode 100644 index 00000000..a0098192 --- /dev/null +++ b/src/main/java/com/sopt/cherrish/global/monitoring/MonitoringTestController.java @@ -0,0 +1,23 @@ +package com.sopt.cherrish.global.monitoring; + +import org.springframework.context.annotation.Profile; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +@RestController +@RequestMapping("/api/monitoring/test") +@Profile({"local", "dev"}) +public class MonitoringTestController { + + @GetMapping("/error") + public void testError() { + throw new RuntimeException("Test error for monitoring alert"); + } + + @GetMapping("/slow") + public String testSlow() throws InterruptedException { + Thread.sleep(2000); + return "slow response"; + } +} From f4a94486d0dfdf20c7a6df567f68ab1d43a64bf6 Mon Sep 17 00:00:00 2001 From: Kimgyuilli Date: Thu, 22 Jan 2026 20:41:37 +0900 Subject: [PATCH 10/23] =?UTF-8?q?feat(prometheus):=20=EC=8B=A4=ED=96=89?= =?UTF-8?q?=EC=8B=9C=20healthCheck=20=EB=A1=9C=EC=A7=81=20=EC=B6=94?= =?UTF-8?q?=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docker-compose.monitoring.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/docker-compose.monitoring.yml b/docker-compose.monitoring.yml index ca85093a..81eaae15 100644 --- a/docker-compose.monitoring.yml +++ b/docker-compose.monitoring.yml @@ -14,6 +14,12 @@ services: - '--storage.tsdb.path=/prometheus' - '--storage.tsdb.retention.time=15d' - '--web.enable-lifecycle' + healthcheck: + test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9090/-/healthy"] + interval: 10s + timeout: 5s + retries: 3 + start_period: 10s networks: - monitoring restart: unless-stopped @@ -34,7 +40,8 @@ services: networks: - monitoring depends_on: - - prometheus + prometheus: + condition: service_healthy restart: unless-stopped volumes: From 04b468bc9c2bb0e5e6a320cf856a28cd0616a6a5 Mon Sep 17 00:00:00 2001 From: Kimgyuilli Date: Thu, 22 Jan 2026 22:08:42 +0900 Subject: [PATCH 11/23] =?UTF-8?q?feat(monitoring):=20=EB=8C=80=EC=8B=9C?= =?UTF-8?q?=EB=B3=B4=EB=93=9C=20=EC=84=A4=EC=A0=95=20=EC=97=85=EB=8D=B0?= =?UTF-8?q?=EC=9D=B4=ED=8A=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- monitoring/grafana/provisioning/dashboards/dashboard.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/monitoring/grafana/provisioning/dashboards/dashboard.yml b/monitoring/grafana/provisioning/dashboards/dashboard.yml index 5ef25c42..6cbbcfb3 100644 --- a/monitoring/grafana/provisioning/dashboards/dashboard.yml +++ b/monitoring/grafana/provisioning/dashboards/dashboard.yml @@ -5,7 +5,7 @@ providers: orgId: 1 folder: 'Cherrish' type: file - disableDeletion: false - updateIntervalSeconds: 30 + disableDeletion: true + updateIntervalSeconds: 120 options: path: /etc/grafana/provisioning/dashboards/json From 0956988ef6e7de0ec522a8ab64bd438738232407 Mon Sep 17 00:00:00 2001 From: Kimgyuilli Date: Thu, 22 Jan 2026 22:22:48 +0900 Subject: [PATCH 12/23] =?UTF-8?q?feat(monitoring):=20Prometheus=20?= =?UTF-8?q?=EC=83=81=ED=83=9C=20=EC=95=8C=EB=A6=BC=20=ED=94=8C=EB=A1=9C?= =?UTF-8?q?=EC=9A=B0=20=EA=B5=AC=EC=B6=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../grafana/provisioning/alerting/rules.yml | 51 ++++++++++++++++++- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/monitoring/grafana/provisioning/alerting/rules.yml b/monitoring/grafana/provisioning/alerting/rules.yml index 7b294be7..04f4c290 100644 --- a/monitoring/grafana/provisioning/alerting/rules.yml +++ b/monitoring/grafana/provisioning/alerting/rules.yml @@ -53,7 +53,7 @@ groups: maxDataPoints: 43200 refId: C type: math - noDataState: OK + noDataState: NoData execErrState: Error for: 2m annotations: @@ -100,7 +100,7 @@ groups: maxDataPoints: 43200 refId: B type: threshold - noDataState: OK + noDataState: NoData execErrState: Error for: 5m annotations: @@ -109,3 +109,50 @@ groups: labels: severity: warning isPaused: false + + - uid: metrics-collection-health + title: Metrics Collection Down + condition: B + data: + - refId: A + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: prometheus + model: + expr: up{job="cherrish-server"} + intervalMs: 1000 + maxDataPoints: 43200 + refId: A + - refId: B + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 1 + type: lt + operator: + type: and + query: + params: + - B + reducer: + type: last + expression: A + intervalMs: 1000 + maxDataPoints: 43200 + refId: B + type: threshold + noDataState: Alerting + execErrState: Error + for: 1m + annotations: + summary: "Cherrish Server metrics collection is down" + description: "Prometheus cannot scrape metrics from Cherrish Server. The application may be down or unreachable." + labels: + severity: critical + isPaused: false From 38658ad9d2300200191c677326f2d2785caa5dd5 Mon Sep 17 00:00:00 2001 From: Kimgyuilli Date: Thu, 22 Jan 2026 22:27:52 +0900 Subject: [PATCH 13/23] =?UTF-8?q?feat(grafana):=20=EB=8C=80=EC=8B=9C?= =?UTF-8?q?=EB=B3=B4=EB=93=9C=20refresh=20=EC=8B=9C=EA=B0=84=20=EC=A1=B0?= =?UTF-8?q?=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../grafana/provisioning/dashboards/json/cherrish-overview.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monitoring/grafana/provisioning/dashboards/json/cherrish-overview.json b/monitoring/grafana/provisioning/dashboards/json/cherrish-overview.json index 26ad37af..8ce9b0dc 100644 --- a/monitoring/grafana/provisioning/dashboards/json/cherrish-overview.json +++ b/monitoring/grafana/provisioning/dashboards/json/cherrish-overview.json @@ -498,7 +498,7 @@ "type": "timeseries" } ], - "refresh": "5s", + "refresh": "30s", "schemaVersion": 38, "tags": ["cherrish", "spring-boot"], "templating": { "list": [] }, From 25d324e3e733a0376b0fe0b68531c953484033d0 Mon Sep 17 00:00:00 2001 From: Kimgyuilli Date: Thu, 22 Jan 2026 22:32:23 +0900 Subject: [PATCH 14/23] =?UTF-8?q?feat(grafana):=20threshold=20=20=EA=B0=92?= =?UTF-8?q?=20=EC=A1=B0=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../dashboards/json/cherrish-overview.json | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/monitoring/grafana/provisioning/dashboards/json/cherrish-overview.json b/monitoring/grafana/provisioning/dashboards/json/cherrish-overview.json index 8ce9b0dc..a77b93db 100644 --- a/monitoring/grafana/provisioning/dashboards/json/cherrish-overview.json +++ b/monitoring/grafana/provisioning/dashboards/json/cherrish-overview.json @@ -48,10 +48,12 @@ "mode": "absolute", "steps": [ { "color": "green", "value": null }, - { "color": "red", "value": 80 } + { "color": "red", "value": 0.8 } ] }, - "unit": "bytes" + "unit": "percentunit", + "min": 0, + "max": 1 }, "overrides": [] }, @@ -64,12 +66,12 @@ "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, - "expr": "jvm_memory_used_bytes{application=\"cherrish\", area=\"heap\"}", - "legendFormat": "{{id}}", + "expr": "sum(jvm_memory_used_bytes{application=\"cherrish\", area=\"heap\"}) / sum(jvm_memory_max_bytes{application=\"cherrish\", area=\"heap\"})", + "legendFormat": "Heap Usage", "refId": "A" } ], - "title": "JVM Heap Memory Used", + "title": "JVM Heap Memory Usage (%)", "type": "timeseries" }, { @@ -103,7 +105,7 @@ "mode": "absolute", "steps": [ { "color": "green", "value": null }, - { "color": "red", "value": 80 } + { "color": "red", "value": 0.5 } ] }, "unit": "s" @@ -225,7 +227,7 @@ "mode": "absolute", "steps": [ { "color": "green", "value": null }, - { "color": "red", "value": 80 } + { "color": "red", "value": 0.8 } ] }, "unit": "percentunit", From e83ba64e2863e918bad21ceedee4d8e62c99abe5 Mon Sep 17 00:00:00 2001 From: Kimgyuilli Date: Thu, 22 Jan 2026 22:37:24 +0900 Subject: [PATCH 15/23] =?UTF-8?q?feat(grafana):=20docker-compose=20?= =?UTF-8?q?=ED=8C=8C=EC=9D=BC=20=EC=9A=B4=EC=98=81=20=ED=99=98=EA=B2=BD?= =?UTF-8?q?=EB=B3=84=EB=A1=9C=20=EB=B6=84=EB=A6=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- monitoring/prometheus/prometheus.prod.yml | 13 +++++++++++++ monitoring/prometheus/prometheus.yml | 5 +---- 2 files changed, 14 insertions(+), 4 deletions(-) create mode 100644 monitoring/prometheus/prometheus.prod.yml diff --git a/monitoring/prometheus/prometheus.prod.yml b/monitoring/prometheus/prometheus.prod.yml new file mode 100644 index 00000000..f2e34033 --- /dev/null +++ b/monitoring/prometheus/prometheus.prod.yml @@ -0,0 +1,13 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:9090'] + + - job_name: 'cherrish-server' + metrics_path: '/actuator/prometheus' + static_configs: + - targets: ['cherrish-server:8081'] diff --git a/monitoring/prometheus/prometheus.yml b/monitoring/prometheus/prometheus.yml index 853a75b8..56a1cee4 100644 --- a/monitoring/prometheus/prometheus.yml +++ b/monitoring/prometheus/prometheus.yml @@ -10,7 +10,4 @@ scrape_configs: - job_name: 'cherrish-server' metrics_path: '/actuator/prometheus' static_configs: - - targets: ['host.docker.internal:8080'] # Docker에서 호스트 접근 (로컬 개발용) - # 프로덕션에서는 실제 서버 주소 사용 - # static_configs: - # - targets: ['cherrish-server:8081'] + - targets: ['host.docker.internal:8080'] From 21b929bf8cfa62d1cd2a61de2e3ee3411c53fcbd Mon Sep 17 00:00:00 2001 From: Kimgyuilli Date: Thu, 22 Jan 2026 22:39:39 +0900 Subject: [PATCH 16/23] =?UTF-8?q?refactor(grafana):=20application-monitori?= =?UTF-8?q?ng=20=EB=B6=88=ED=95=84=EC=9A=94=ED=95=9C=20=EB=9D=BC=EC=9D=B8?= =?UTF-8?q?=20=EC=A0=9C=EA=B1=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/resources/application-monitoring.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/main/resources/application-monitoring.yaml b/src/main/resources/application-monitoring.yaml index 7c62aaa6..236f1c4c 100644 --- a/src/main/resources/application-monitoring.yaml +++ b/src/main/resources/application-monitoring.yaml @@ -6,8 +6,6 @@ management: endpoint: health: show-details: when_authorized - prometheus: - access: metrics: tags: application: cherrish From 4a0d26b6fc69e47b6dc9d6dae06ba78e157a47a2 Mon Sep 17 00:00:00 2001 From: Kimgyuilli Date: Thu, 22 Jan 2026 22:45:09 +0900 Subject: [PATCH 17/23] =?UTF-8?q?feat(grafana):=20=EA=B0=81=20=EB=A3=B0?= =?UTF-8?q?=EC=97=90=20rule=20name=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- monitoring/grafana/provisioning/alerting/rules.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/monitoring/grafana/provisioning/alerting/rules.yml b/monitoring/grafana/provisioning/alerting/rules.yml index 04f4c290..d8a2b49f 100644 --- a/monitoring/grafana/provisioning/alerting/rules.yml +++ b/monitoring/grafana/provisioning/alerting/rules.yml @@ -61,6 +61,7 @@ groups: description: "Error rate is above 5% for the last 2 minutes" labels: severity: critical + rulename: "High Error Rate" isPaused: false - uid: high-latency @@ -108,6 +109,7 @@ groups: description: "P95 response time is above 1 second for the last 5 minutes" labels: severity: warning + rulename: "High Latency (P95)" isPaused: false - uid: metrics-collection-health @@ -155,4 +157,5 @@ groups: description: "Prometheus cannot scrape metrics from Cherrish Server. The application may be down or unreachable." labels: severity: critical + rulename: "Metrics Collection Down" isPaused: false From 79af7e708fbcde06f66b2b4b2c20fa4ee8762460 Mon Sep 17 00:00:00 2001 From: Kimgyuilli Date: Thu, 22 Jan 2026 23:12:56 +0900 Subject: [PATCH 18/23] =?UTF-8?q?feat(grafana):=20=EB=94=94=EC=8A=A4?= =?UTF-8?q?=EC=BD=94=EB=93=9C=20=EC=9D=91=EB=8B=B5=20=EA=B0=9C=EC=84=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../grafana/provisioning/alerting/contactpoints.yml | 12 ++++-------- monitoring/grafana/provisioning/alerting/rules.yml | 4 ++-- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/monitoring/grafana/provisioning/alerting/contactpoints.yml b/monitoring/grafana/provisioning/alerting/contactpoints.yml index da4d5ee5..46bfcf6e 100644 --- a/monitoring/grafana/provisioning/alerting/contactpoints.yml +++ b/monitoring/grafana/provisioning/alerting/contactpoints.yml @@ -10,18 +10,14 @@ contactPoints: url: ${DISCORD_MONITORING_WEBHOOK_URL} use_discord_username: true avatar_url: "https://grafana.com/static/assets/img/fav32.png" - title: '{{ if eq .Status "firing" }}:rotating_light: ALERT{{ else }}:white_check_mark: RESOLVED{{ end }}' + title: '{{ if eq .Status "firing" }}:rotating_light: ALERT{{ else }}:white_check_mark: RESOLVED{{ end }} ({{ len .Alerts }} alerts)' message: | - **:bar_chart: {{ .CommonLabels.rulename }}** - - {{ if eq .Status "firing" }}:red_circle:{{ else }}:green_circle:{{ end }} **Status:** {{ .Status | toUpper }} - :warning: **Severity:** {{ .CommonLabels.severity | toUpper }} - :file_folder: **Folder:** {{ .CommonLabels.grafana_folder }} - {{ range .Alerts }} - ---------------------- + {{ if eq $.Status "firing" }}:red_circle:{{ else }}:green_circle:{{ end }} **{{ .Labels.rulename }}** + :warning: **Severity:** {{ .Labels.severity | toUpper }} :clipboard: **Summary:** {{ .Annotations.summary }} :memo: **Detail:** {{ .Annotations.description }} :link: **View:** {{ .GeneratorURL }} + {{ end }} disableResolveMessage: false diff --git a/monitoring/grafana/provisioning/alerting/rules.yml b/monitoring/grafana/provisioning/alerting/rules.yml index d8a2b49f..6a527a80 100644 --- a/monitoring/grafana/provisioning/alerting/rules.yml +++ b/monitoring/grafana/provisioning/alerting/rules.yml @@ -149,9 +149,9 @@ groups: maxDataPoints: 43200 refId: B type: threshold - noDataState: Alerting + noDataState: NoData execErrState: Error - for: 1m + for: 2m annotations: summary: "Cherrish Server metrics collection is down" description: "Prometheus cannot scrape metrics from Cherrish Server. The application may be down or unreachable." From 842637981ea47df2382351da28311f7855cd99a7 Mon Sep 17 00:00:00 2001 From: Kimgyuilli Date: Thu, 22 Jan 2026 23:13:49 +0900 Subject: [PATCH 19/23] =?UTF-8?q?feat(grafana):=20=EB=94=94=EC=8A=A4?= =?UTF-8?q?=EC=BD=94=EB=93=9C=20=EC=95=8C=EB=A6=BC=20=EA=B7=B8=EB=A3=B9?= =?UTF-8?q?=ED=99=94=20=EB=B3=80=EA=B2=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- monitoring/grafana/provisioning/alerting/policies.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/monitoring/grafana/provisioning/alerting/policies.yml b/monitoring/grafana/provisioning/alerting/policies.yml index 618f5dc9..1e881943 100644 --- a/monitoring/grafana/provisioning/alerting/policies.yml +++ b/monitoring/grafana/provisioning/alerting/policies.yml @@ -5,7 +5,6 @@ policies: receiver: discord-monitoring group_by: - grafana_folder - - alertname - group_wait: 30s + group_wait: 60s group_interval: 5m repeat_interval: 4h From 2b619cac9a195afabcbf1a769c4554a8ede783d4 Mon Sep 17 00:00:00 2001 From: Kimgyuilli Date: Fri, 23 Jan 2026 00:17:17 +0900 Subject: [PATCH 20/23] =?UTF-8?q?feat(monitoring):=20docker=20linux=20?= =?UTF-8?q?=ED=99=98=EA=B2=BD=20=ED=98=B8=ED=99=98=EC=84=B1=20=EA=B0=9C?= =?UTF-8?q?=EC=84=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docker-compose.monitoring.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker-compose.monitoring.yml b/docker-compose.monitoring.yml index 81eaae15..bb683262 100644 --- a/docker-compose.monitoring.yml +++ b/docker-compose.monitoring.yml @@ -20,6 +20,8 @@ services: timeout: 5s retries: 3 start_period: 10s + extra_hosts: + - "host.docker.internal:host-gateway" networks: - monitoring restart: unless-stopped From 1a4c61caf99399e0e1db1498c890f1319d5ad2a9 Mon Sep 17 00:00:00 2001 From: Kimgyuilli Date: Fri, 23 Jan 2026 05:17:34 +0900 Subject: [PATCH 21/23] =?UTF-8?q?feat(monitoring):=20=EB=B6=88=ED=95=84?= =?UTF-8?q?=EC=9A=94=ED=95=9C=20=EB=B2=84=EC=A0=84=20=EB=AA=85=EC=8B=9C=20?= =?UTF-8?q?=EC=A0=95=EB=A6=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docker-compose.monitoring.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/docker-compose.monitoring.yml b/docker-compose.monitoring.yml index bb683262..c3bd615f 100644 --- a/docker-compose.monitoring.yml +++ b/docker-compose.monitoring.yml @@ -1,5 +1,3 @@ -version: '3.8' - services: prometheus: image: prom/prometheus:v2.45.0 From 438c506b319f22204c8f9868f853bc77e8ad6e85 Mon Sep 17 00:00:00 2001 From: Kimgyuilli Date: Fri, 23 Jan 2026 15:26:53 +0900 Subject: [PATCH 22/23] =?UTF-8?q?feat(prometheus):=203.5.1(LTS)=20?= =?UTF-8?q?=EB=B2=84=EC=A0=84=EC=9C=BC=EB=A1=9C=20=EC=97=85=EB=8D=B0?= =?UTF-8?q?=EC=9D=B4=ED=8A=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docker-compose.monitoring.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.monitoring.yml b/docker-compose.monitoring.yml index c3bd615f..2cc72830 100644 --- a/docker-compose.monitoring.yml +++ b/docker-compose.monitoring.yml @@ -1,6 +1,6 @@ services: prometheus: - image: prom/prometheus:v2.45.0 + image: prom/prometheus:v3.5.1 container_name: cherrish-prometheus ports: - "9090:9090" From 1154c99b5d2f897ffba6dfac90d80a0ff5796f94 Mon Sep 17 00:00:00 2001 From: Kimgyuilli Date: Fri, 23 Jan 2026 15:30:30 +0900 Subject: [PATCH 23/23] =?UTF-8?q?feat(grafana):=2011.6.9(LTS)=20=EB=B2=84?= =?UTF-8?q?=EC=A0=84=EC=9C=BC=EB=A1=9C=20=EC=97=85=EB=8D=B0=EC=9D=B4?= =?UTF-8?q?=ED=8A=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docker-compose.monitoring.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.monitoring.yml b/docker-compose.monitoring.yml index 2cc72830..1d9d5c97 100644 --- a/docker-compose.monitoring.yml +++ b/docker-compose.monitoring.yml @@ -25,7 +25,7 @@ services: restart: unless-stopped grafana: - image: grafana/grafana:10.0.0 + image: grafana/grafana:11.6.9 container_name: cherrish-grafana ports: - "3000:3000"