Vendure 監視・運用設計書¶
概要¶
Fly.io + Upstash で運用される Vendure システムの包括的な監視・運用戦略について説明します。
監視アーキテクチャ¶
監視対象とメトリクス¶
graph TB
subgraph "Application Layer"
V[Vendure API]
NS[Next.js Storefront]
W[Worker Process]
end
subgraph "Infrastructure Layer"
PG[(PostgreSQL)]
R[(Redis)]
FS[File Storage]
end
subgraph "Monitoring Stack"
PM[Prometheus]
GF[Grafana]
AL[AlertManager]
LOG[Log Aggregation]
end
subgraph "External Services"
DD[Datadog]
SN[Sentry]
UPT[Uptime Robot]
end
V --> PM
NS --> PM
W --> PM
PG --> PM
R --> PM
FS --> PM
PM --> GF
PM --> AL
V --> LOG
NS --> LOG
W --> LOG
LOG --> DD
V --> SN
NS --> SN
UPT --> V
UPT --> NS
1. アプリケーション監視¶
1.1 パフォーマンスメトリクス¶
レスポンス時間監視¶
// performance-metrics.ts
import { performance } from 'perf_hooks';
import { Logger } from '@vendure/core';
export class PerformanceMonitor {
private logger = new Logger(PerformanceMonitor.name);
trackGraphQLQuery(operationName: string, duration: number) {
this.logger.verbose(`GraphQL ${operationName}: ${duration}ms`);
// Prometheus メトリクス
graphqlDurationHistogram
.labels({ operation: operationName })
.observe(duration / 1000);
// SLA閾値チェック
if (duration > 2000) {
this.logger.warn(
`Slow query detected: ${operationName} took ${duration}ms`,
);
}
}
trackAPIEndpoint(
endpoint: string,
method: string,
statusCode: number,
duration: number,
) {
apiDurationHistogram
.labels({ endpoint, method, status: statusCode.toString() })
.observe(duration / 1000);
apiRequestsTotal
.labels({ endpoint, method, status: statusCode.toString() })
.inc();
}
}
// Prometheus メトリクス定義
import { register, Histogram, Counter } from 'prom-client';
export const graphqlDurationHistogram = new Histogram({
name: 'vendure_graphql_duration_seconds',
help: 'GraphQL query duration',
labelNames: ['operation'],
buckets: [0.1, 0.5, 1, 2, 5, 10],
});
export const apiDurationHistogram = new Histogram({
name: 'vendure_api_duration_seconds',
help: 'API endpoint duration',
labelNames: ['endpoint', 'method', 'status'],
buckets: [0.1, 0.5, 1, 2, 5, 10],
});
export const apiRequestsTotal = new Counter({
name: 'vendure_api_requests_total',
help: 'Total API requests',
labelNames: ['endpoint', 'method', 'status'],
});
register.registerMetric(graphqlDurationHistogram);
register.registerMetric(apiDurationHistogram);
register.registerMetric(apiRequestsTotal);
ビジネスメトリクス¶
// business-metrics.ts
export class BusinessMetrics {
private logger = new Logger(BusinessMetrics.name);
// 注文関連メトリクス
trackOrder(order: Order) {
orderTotal.labels({ status: order.state }).observe(order.total);
ordersCreatedTotal
.labels({
customerType: this.getCustomerType(order.customer),
channel: order.channels[0]?.code || 'default',
})
.inc();
}
// 商品関連メトリクス
trackProductView(productId: string, customerId?: string) {
productViewsTotal.labels({ productId }).inc();
if (customerId) {
customerActivityTotal
.labels({ customerId, action: 'product_view' })
.inc();
}
}
// B2B特有メトリクス
trackRebateCalculation(customerId: string, amount: number, period: string) {
rebateAmountGauge.labels({ customerId, period }).set(amount);
rebateCalculationsTotal.labels({ period }).inc();
}
// キャンペーン効果測定
trackCampaignUsage(campaignId: string, discount: number) {
campaignUsageTotal.labels({ campaignId }).inc();
campaignDiscountTotal.labels({ campaignId }).observe(discount);
}
private getCustomerType(customer: Customer): string {
// B2B顧客タイプの判定ロジック
const customFields = customer.customFields as any;
return customFields?.customerStatus || 'general';
}
}
// ビジネスメトリクス定義
export const orderTotal = new Histogram({
name: 'vendure_order_total_amount',
help: 'Order total amount',
labelNames: ['status'],
buckets: [1000, 5000, 10000, 50000, 100000, 500000],
});
export const ordersCreatedTotal = new Counter({
name: 'vendure_orders_created_total',
help: 'Total orders created',
labelNames: ['customerType', 'channel'],
});
export const productViewsTotal = new Counter({
name: 'vendure_product_views_total',
help: 'Total product views',
labelNames: ['productId'],
});
export const rebateAmountGauge = new Gauge({
name: 'vendure_rebate_amount',
help: 'Customer rebate amount',
labelNames: ['customerId', 'period'],
});
1.2 エラー監視¶
Sentry 統合¶
// error-monitoring.ts
import * as Sentry from '@sentry/node';
import { Logger } from '@vendure/core';
export class ErrorMonitor {
static init() {
Sentry.init({
dsn: process.env.SENTRY_DSN,
environment: process.env.NODE_ENV,
tracesSampleRate: process.env.NODE_ENV === 'production' ? 0.1 : 1.0,
beforeSend(event) {
// 機密データのフィルタリング
if (event.request?.data) {
event.request.data = this.sanitizeData(event.request.data);
}
return event;
},
});
}
static captureException(error: Error, context?: any) {
Sentry.withScope(scope => {
if (context) {
scope.setContext('additional', context);
}
Sentry.captureException(error);
});
}
static captureMessage(message: string, level: Sentry.SeverityLevel = 'info') {
Sentry.captureMessage(message, level);
}
private static sanitizeData(data: any): any {
// パスワード、トークンなどの機密データを除去
const sensitiveFields = ['password', 'token', 'apiKey', 'secret'];
if (typeof data === 'object' && data !== null) {
const sanitized = { ...data };
for (const field of sensitiveFields) {
if (sanitized[field]) {
sanitized[field] = '[REDACTED]';
}
}
return sanitized;
}
return data;
}
}
// GraphQL エラーハンドリング
export const graphQLErrorHandler = (error: any) => {
const logger = new Logger('GraphQLError');
logger.error('GraphQL Error:', {
message: error.message,
path: error.path,
operation: error.source?.body,
});
ErrorMonitor.captureException(error, {
graphql: {
operation: error.source?.body,
path: error.path,
},
});
};
2. インフラストラクチャ監視¶
2.1 Fly.io メトリクス¶
システムリソース監視¶
// system-metrics.ts
import { register, Gauge } from 'prom-client';
import { execSync } from 'child_process';
export class SystemMetrics {
private cpuUsageGauge = new Gauge({
name: 'system_cpu_usage_percent',
help: 'CPU usage percentage',
});
private memoryUsageGauge = new Gauge({
name: 'system_memory_usage_bytes',
help: 'Memory usage in bytes',
});
private diskUsageGauge = new Gauge({
name: 'system_disk_usage_bytes',
help: 'Disk usage in bytes',
labelNames: ['mount'],
});
constructor() {
register.registerMetric(this.cpuUsageGauge);
register.registerMetric(this.memoryUsageGauge);
register.registerMetric(this.diskUsageGauge);
// 30秒ごとにメトリクス更新
setInterval(() => this.updateMetrics(), 30000);
}
private updateMetrics() {
try {
// CPU使用率
const cpuUsage = this.getCPUUsage();
this.cpuUsageGauge.set(cpuUsage);
// メモリ使用量
const memoryUsage = this.getMemoryUsage();
this.memoryUsageGauge.set(memoryUsage);
// ディスク使用量
const diskUsage = this.getDiskUsage();
Object.entries(diskUsage).forEach(([mount, usage]) => {
this.diskUsageGauge.labels(mount).set(usage);
});
} catch (error) {
console.error('Failed to update system metrics:', error);
}
}
private getCPUUsage(): number {
try {
const output = execSync(
"top -bn1 | grep 'Cpu(s)' | awk '{print $2}' | cut -d'%' -f1",
);
return parseFloat(output.toString().trim());
} catch {
return 0;
}
}
private getMemoryUsage(): number {
try {
const output = execSync("free -b | grep '^Mem:' | awk '{print $3}'");
return parseInt(output.toString().trim());
} catch {
return 0;
}
}
private getDiskUsage(): Record<string, number> {
try {
const output = execSync('df -B1 | tail -n +2');
const lines = output.toString().trim().split('\n');
const usage: Record<string, number> = {};
lines.forEach(line => {
const parts = line.split(/\s+/);
if (parts.length >= 6) {
const mount = parts[5];
const used = parseInt(parts[2]);
usage[mount] = used;
}
});
return usage;
} catch {
return {};
}
}
}
2.2 データベース監視¶
PostgreSQL 監視¶
// database-metrics.ts
export class DatabaseMetrics {
private connectionPoolGauge = new Gauge({
name: 'postgres_connection_pool_size',
help: 'PostgreSQL connection pool size',
labelNames: ['state'],
});
private queryDurationHistogram = new Histogram({
name: 'postgres_query_duration_seconds',
help: 'PostgreSQL query duration',
labelNames: ['operation'],
buckets: [0.01, 0.05, 0.1, 0.5, 1, 2, 5],
});
private activeConnectionsGauge = new Gauge({
name: 'postgres_active_connections',
help: 'Number of active PostgreSQL connections',
});
constructor(private dataSource: DataSource) {
register.registerMetric(this.connectionPoolGauge);
register.registerMetric(this.queryDurationHistogram);
register.registerMetric(this.activeConnectionsGauge);
// 接続プール監視
setInterval(() => this.updateConnectionMetrics(), 30000);
}
trackQuery(operation: string, duration: number) {
this.queryDurationHistogram.labels({ operation }).observe(duration / 1000);
}
private async updateConnectionMetrics() {
try {
if (this.dataSource.isInitialized) {
const pool = (this.dataSource.driver as any).master;
if (pool) {
this.connectionPoolGauge.labels('total').set(pool.totalCount || 0);
this.connectionPoolGauge.labels('idle').set(pool.idleCount || 0);
this.connectionPoolGauge
.labels('waiting')
.set(pool.waitingCount || 0);
}
// アクティブ接続数を取得
const result = await this.dataSource.query(
'SELECT count(*) as active_connections FROM pg_stat_activity WHERE state = $1',
['active'],
);
this.activeConnectionsGauge.set(
parseInt(result[0]?.active_connections || '0'),
);
}
} catch (error) {
console.error('Failed to update database metrics:', error);
}
}
}
Redis 監視¶
// redis-metrics.ts
export class RedisMetrics {
private connectionGauge = new Gauge({
name: 'redis_connected_clients',
help: 'Number of connected Redis clients',
});
private memoryUsageGauge = new Gauge({
name: 'redis_memory_usage_bytes',
help: 'Redis memory usage in bytes',
});
private commandsProcessedTotal = new Counter({
name: 'redis_commands_processed_total',
help: 'Total number of commands processed',
labelNames: ['command'],
});
private keyspaceGauge = new Gauge({
name: 'redis_keyspace_keys',
help: 'Number of keys in Redis keyspace',
labelNames: ['db'],
});
constructor(private redis: Redis) {
register.registerMetric(this.connectionGauge);
register.registerMetric(this.memoryUsageGauge);
register.registerMetric(this.commandsProcessedTotal);
register.registerMetric(this.keyspaceGauge);
// Redis INFO 監視
setInterval(() => this.updateRedisMetrics(), 30000);
}
private async updateRedisMetrics() {
try {
const info = await this.redis.info();
const sections = this.parseRedisInfo(info);
// 接続数
if (sections.clients?.connected_clients) {
this.connectionGauge.set(parseInt(sections.clients.connected_clients));
}
// メモリ使用量
if (sections.memory?.used_memory) {
this.memoryUsageGauge.set(parseInt(sections.memory.used_memory));
}
// キースペース
Object.entries(sections.keyspace || {}).forEach(([db, info]) => {
const match = info.match(/keys=(\d+)/);
if (match) {
this.keyspaceGauge.labels(db).set(parseInt(match[1]));
}
});
} catch (error) {
console.error('Failed to update Redis metrics:', error);
}
}
private parseRedisInfo(info: string): Record<string, Record<string, string>> {
const sections: Record<string, Record<string, string>> = {};
let currentSection = '';
info.split('\n').forEach(line => {
line = line.trim();
if (line.startsWith('#')) {
currentSection = line.substring(2).toLowerCase();
sections[currentSection] = {};
} else if (line.includes(':')) {
const [key, value] = line.split(':');
if (sections[currentSection]) {
sections[currentSection][key] = value;
}
}
});
return sections;
}
}
3. ログ管理¶
3.1 構造化ログ¶
// structured-logging.ts
import { Logger as VendureLogger } from '@vendure/core';
import winston from 'winston';
export class StructuredLogger extends VendureLogger {
private winston: winston.Logger;
constructor() {
super();
this.winston = winston.createLogger({
level: process.env.LOG_LEVEL || 'info',
format: winston.format.combine(
winston.format.timestamp(),
winston.format.errors({ stack: true }),
winston.format.json(),
),
defaultMeta: {
service: 'vendure-api',
environment: process.env.NODE_ENV,
instance: process.env.FLY_MACHINE_ID || 'local',
},
transports: [
new winston.transports.Console(),
new winston.transports.File({
filename: '/app/logs/error.log',
level: 'error',
}),
new winston.transports.File({
filename: '/app/logs/combined.log',
}),
],
});
}
error(message: string, trace?: string, context?: string) {
this.winston.error(message, {
trace,
context,
level: 'error',
});
super.error(message, trace, context);
}
warn(message: string, context?: string) {
this.winston.warn(message, {
context,
level: 'warn',
});
super.warn(message, context);
}
log(message: string, context?: string) {
this.winston.info(message, {
context,
level: 'info',
});
super.log(message, context);
}
debug(message: string, context?: string) {
this.winston.debug(message, {
context,
level: 'debug',
});
super.debug(message, context);
}
verbose(message: string, context?: string) {
this.winston.verbose(message, {
context,
level: 'verbose',
});
super.verbose(message, context);
}
// ビジネスイベントログ
logBusinessEvent(event: string, data: any, userId?: string) {
this.winston.info('Business Event', {
event,
data,
userId,
timestamp: new Date().toISOString(),
level: 'info',
category: 'business',
});
}
// セキュリティイベントログ
logSecurityEvent(
event: string,
details: any,
severity: 'low' | 'medium' | 'high' = 'medium',
) {
this.winston.warn('Security Event', {
event,
details,
severity,
timestamp: new Date().toISOString(),
level: 'warn',
category: 'security',
});
}
// パフォーマンスログ
logPerformance(operation: string, duration: number, metadata?: any) {
this.winston.info('Performance Log', {
operation,
duration,
metadata,
timestamp: new Date().toISOString(),
level: 'info',
category: 'performance',
});
}
}
3.2 ログ集約設定¶
# fluentd-config.yml (オプション)
<source> @type tail path /app/logs/*.log pos_file
/var/log/fluentd-vendure.log.pos tag vendure.* format json time_key timestamp
time_format %Y-%m-%dT%H:%M:%S.%LZ </source>
<match vendure.**> @type datadog api_key "#{ENV['DD_API_KEY']}" service vendure
source nodejs sourcecategory vendure tags
environment:#{ENV['NODE_ENV']},instance:#{ENV['FLY_MACHINE_ID']} </match>
4. アラート設定¶
4.1 クリティカルアラート¶
// alert-rules.ts
export const alertRules = {
// アプリケーション可用性
application_down: {
metric: 'up',
condition: '== 0',
duration: '2m',
severity: 'critical',
message: 'Vendure application is down',
channels: ['slack', 'email', 'pagerduty'],
},
// レスポンス時間
high_response_time: {
metric: 'vendure_api_duration_seconds',
condition: '> 2',
duration: '5m',
severity: 'warning',
message: 'API response time is high (>2s)',
channels: ['slack'],
},
// エラー率
high_error_rate: {
metric: 'rate(vendure_api_requests_total{status=~"5.."}[5m])',
condition: '> 0.05',
duration: '3m',
severity: 'critical',
message: 'High error rate detected (>5%)',
channels: ['slack', 'email'],
},
// データベース接続
database_connection_high: {
metric: 'postgres_active_connections',
condition: '> 80',
duration: '5m',
severity: 'warning',
message: 'High database connection count',
channels: ['slack'],
},
// Redis メモリ使用量
redis_memory_high: {
metric: 'redis_memory_usage_bytes',
condition: '> 2.5e9', // 2.5GB
duration: '5m',
severity: 'warning',
message: 'Redis memory usage is high',
channels: ['slack'],
},
// ディスク使用量
disk_usage_high: {
metric: 'system_disk_usage_bytes',
condition: '> 0.85',
duration: '10m',
severity: 'warning',
message: 'Disk usage is high (>85%)',
channels: ['slack'],
},
// ビジネス指標
order_failure_rate: {
metric: 'rate(vendure_orders_failed_total[10m])',
condition: '> 0.1',
duration: '5m',
severity: 'critical',
message: 'Order failure rate is high (>10%)',
channels: ['slack', 'email', 'business_team'],
},
};
4.2 アラート通知設定¶
// notification-channels.ts
export class NotificationManager {
private channels: Map<string, NotificationChannel> = new Map();
constructor() {
this.setupChannels();
}
private setupChannels() {
// Slack 通知
this.channels.set(
'slack',
new SlackChannel({
webhookUrl: process.env.SLACK_WEBHOOK_URL!,
channel: '#vendure-alerts',
username: 'Vendure Monitor',
}),
);
// Email 通知
this.channels.set(
'email',
new EmailChannel({
smtp: {
host: process.env.SMTP_HOST!,
port: 587,
auth: {
user: process.env.SMTP_USER!,
pass: process.env.SMTP_PASSWORD!,
},
},
from: 'alerts@ritsubi.co.jp',
to: ['admin@ritsubi.co.jp', 'dev@ritsubi.co.jp'],
}),
);
// PagerDuty 通知(クリティカル用)
this.channels.set(
'pagerduty',
new PagerDutyChannel({
integrationKey: process.env.PAGERDUTY_INTEGRATION_KEY!,
}),
);
}
async sendAlert(alert: Alert) {
const promises = alert.channels.map(async channelName => {
const channel = this.channels.get(channelName);
if (channel) {
try {
await channel.send(alert);
} catch (error) {
console.error(`Failed to send alert to ${channelName}:`, error);
}
}
});
await Promise.allSettled(promises);
}
}
interface Alert {
severity: 'info' | 'warning' | 'critical';
title: string;
message: string;
timestamp: Date;
channels: string[];
metadata?: any;
}
interface NotificationChannel {
send(alert: Alert): Promise<void>;
}
class SlackChannel implements NotificationChannel {
constructor(
private config: {
webhookUrl: string;
channel: string;
username: string;
},
) {}
async send(alert: Alert): Promise<void> {
const color = {
info: '#36a64f',
warning: '#ff9900',
critical: '#ff0000',
}[alert.severity];
const payload = {
channel: this.config.channel,
username: this.config.username,
attachments: [
{
color,
title: alert.title,
text: alert.message,
timestamp: Math.floor(alert.timestamp.getTime() / 1000),
fields: [
{
title: 'Severity',
value: alert.severity.toUpperCase(),
short: true,
},
{
title: 'Environment',
value: process.env.NODE_ENV || 'unknown',
short: true,
},
],
},
],
};
const response = await fetch(this.config.webhookUrl, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(payload),
});
if (!response.ok) {
throw new Error(`Slack notification failed: ${response.statusText}`);
}
}
}
5. ヘルスチェック¶
5.1 アプリケーションヘルスチェック¶
// health-check.ts
import { Controller, Get } from '@nestjs/common';
import {
HealthCheck,
HealthCheckService,
TypeOrmHealthIndicator,
MemoryHealthIndicator,
DiskHealthIndicator,
} from '@nestjs/terminus';
@Controller('health')
export class HealthController {
constructor(
private health: HealthCheckService,
private db: TypeOrmHealthIndicator,
private memory: MemoryHealthIndicator,
private disk: DiskHealthIndicator,
private redisHealth: RedisHealthIndicator,
) {}
@Get()
@HealthCheck()
check() {
return this.health.check([
// データベース接続チェック
() => this.db.pingCheck('database'),
// Redis 接続チェック
() => this.redisHealth.pingCheck('redis'),
// メモリ使用量チェック
() => this.memory.checkHeap('memory_heap', 250 * 1024 * 1024),
() => this.memory.checkRSS('memory_rss', 500 * 1024 * 1024),
// ディスク使用量チェック
() =>
this.disk.checkStorage('storage', {
path: '/',
thresholdPercent: 0.85,
}),
// カスタムビジネスロジックチェック
() => this.customBusinessHealthCheck(),
]);
}
@Get('readiness')
@HealthCheck()
readiness() {
return this.health.check([
() => this.db.pingCheck('database'),
() => this.redisHealth.pingCheck('redis'),
]);
}
@Get('liveness')
@HealthCheck()
liveness() {
return this.health.check([
() => this.memory.checkHeap('memory_heap', 500 * 1024 * 1024),
]);
}
private async customBusinessHealthCheck() {
try {
// 重要なビジネス機能の動作確認
// 例: 商品検索機能、価格計算機能など
const testProduct = await this.productService.findOne('test-product-id');
if (!testProduct) {
throw new Error('Test product not found');
}
const testPricing = await this.pricingService.calculatePrice(
testProduct,
'test-customer-id',
);
if (!testPricing) {
throw new Error('Pricing calculation failed');
}
return {
'business-logic': {
status: 'up',
details: {
productSearch: 'ok',
pricingCalculation: 'ok',
},
},
};
} catch (error) {
return {
'business-logic': {
status: 'down',
details: {
error: error.message,
},
},
};
}
}
}
class RedisHealthIndicator {
constructor(private redis: Redis) {}
async pingCheck(key: string) {
try {
const result = await this.redis.ping();
return {
[key]: {
status: result === 'PONG' ? 'up' : 'down',
},
};
} catch (error) {
return {
[key]: {
status: 'down',
details: error.message,
},
};
}
}
}
6. ダッシュボード設定¶
6.1 Grafana ダッシュボード¶
{
"dashboard": {
"title": "Vendure Production Dashboard",
"panels": [
{
"title": "API Response Time",
"type": "graph",
"targets": [
{
"expr": "histogram_quantile(0.95, rate(vendure_api_duration_seconds_bucket[5m]))",
"legendFormat": "95th percentile"
},
{
"expr": "histogram_quantile(0.50, rate(vendure_api_duration_seconds_bucket[5m]))",
"legendFormat": "50th percentile"
}
],
"yAxes": [
{
"label": "Response Time (seconds)",
"max": 5
}
]
},
{
"title": "Request Rate",
"type": "graph",
"targets": [
{
"expr": "rate(vendure_api_requests_total[5m])",
"legendFormat": "{{endpoint}} - {{method}}"
}
]
},
{
"title": "Error Rate",
"type": "singlestat",
"targets": [
{
"expr": "rate(vendure_api_requests_total{status=~\"5..\"}[5m]) / rate(vendure_api_requests_total[5m]) * 100"
}
],
"valueName": "current",
"format": "percent",
"thresholds": "1,5",
"colorBackground": true
},
{
"title": "Database Connections",
"type": "graph",
"targets": [
{
"expr": "postgres_active_connections",
"legendFormat": "Active Connections"
},
{
"expr": "postgres_connection_pool_size{state=\"idle\"}",
"legendFormat": "Idle Pool"
}
]
},
{
"title": "Redis Memory Usage",
"type": "graph",
"targets": [
{
"expr": "redis_memory_usage_bytes / 1024 / 1024",
"legendFormat": "Memory Usage (MB)"
}
]
},
{
"title": "Business Metrics",
"type": "row",
"panels": [
{
"title": "Orders per Hour",
"type": "graph",
"targets": [
{
"expr": "rate(vendure_orders_created_total[1h]) * 3600",
"legendFormat": "{{customerType}}"
}
]
},
{
"title": "Revenue per Hour",
"type": "graph",
"targets": [
{
"expr": "rate(vendure_order_total_amount_sum[1h]) * 3600",
"legendFormat": "Revenue (JPY/hour)"
}
]
}
]
}
],
"time": {
"from": "now-6h",
"to": "now"
},
"refresh": "30s"
}
}
7. 運用プロセス¶
7.1 インシデント対応¶
// incident-response.ts
export class IncidentResponse {
private static readonly SEVERITY_LEVELS = {
P1: { name: 'Critical', responseTime: 15, resolveTime: 4 * 60 }, // 15分以内対応、4時間以内解決
P2: { name: 'High', responseTime: 60, resolveTime: 24 * 60 }, // 1時間以内対応、24時間以内解決
P3: { name: 'Medium', responseTime: 4 * 60, resolveTime: 72 * 60 }, // 4時間以内対応、72時間以内解決
P4: { name: 'Low', responseTime: 24 * 60, resolveTime: 168 * 60 }, // 24時間以内対応、1週間以内解決
};
static async handleIncident(alert: Alert) {
const severity = this.determineSeverity(alert);
const incident = await this.createIncident(alert, severity);
await this.notifyOnCall(incident);
await this.executeRunbook(incident);
return incident;
}
private static determineSeverity(alert: Alert): string {
// アラートタイプに基づく重要度判定
const criticalPatterns = [
'application_down',
'database_connection_failed',
'order_failure_rate',
];
if (criticalPatterns.some(pattern => alert.title.includes(pattern))) {
return 'P1';
}
if (alert.severity === 'critical') {
return 'P1';
} else if (alert.severity === 'warning') {
return 'P2';
} else {
return 'P3';
}
}
private static async executeRunbook(incident: Incident) {
const runbook = this.getRunbook(incident.type);
if (runbook) {
await runbook.execute(incident);
}
}
}
// 自動復旧処理
export class AutoRecovery {
static async attemptRecovery(alert: Alert): Promise<boolean> {
switch (alert.title) {
case 'high_memory_usage':
return await this.restartApplication();
case 'redis_connection_failed':
return await this.reconnectRedis();
case 'database_connection_high':
return await this.killIdleConnections();
default:
return false;
}
}
private static async restartApplication(): Promise<boolean> {
try {
// Fly.io アプリケーション再起動
execSync('flyctl machine restart', { timeout: 30000 });
// ヘルスチェック待機
await this.waitForHealthy(60000);
return true;
} catch (error) {
console.error('Auto recovery failed:', error);
return false;
}
}
private static async waitForHealthy(timeout: number): Promise<void> {
const start = Date.now();
while (Date.now() - start < timeout) {
try {
const response = await fetch('/health');
if (response.ok) {
return;
}
} catch {
// 接続エラーは無視して継続
}
await new Promise(resolve => setTimeout(resolve, 5000));
}
throw new Error('Health check timeout');
}
}
7.2 定期メンテナンス¶
#!/bin/bash
# maintenance.sh - 定期メンテナンススクリプト
# データベースメンテナンス
postgres_maintenance() {
echo "Running PostgreSQL maintenance..."
flyctl postgres connect -a ritsubi-vendure-db << EOF
VACUUM ANALYZE;
REINDEX DATABASE ritsubi_vendure;
-- 古いログの削除
DELETE FROM vendure_session WHERE expires_at < NOW() - INTERVAL '7 days';
DELETE FROM vendure_job_record WHERE finished_at < NOW() - INTERVAL '30 days';
EOF
}
# Redis メンテナンス
redis_maintenance() {
echo "Running Redis maintenance..."
# 期限切れキーの削除
redis-cli --scan --pattern "expired:*" | xargs redis-cli del
# メモリ最適化
redis-cli MEMORY PURGE
}
# ログローテーション
log_rotation() {
echo "Rotating application logs..."
# 古いログファイルの圧縮・削除
find /app/logs -name "*.log" -mtime +7 -exec gzip {} \;
find /app/logs -name "*.log.gz" -mtime +30 -delete
}
# メトリクスクリーンアップ
metrics_cleanup() {
echo "Cleaning up old metrics..."
# 古いPrometheusメトリクスの削除
# (通常はPrometheusサーバー側で設定)
}
# バックアップ検証
backup_verification() {
echo "Verifying backups..."
# PostgreSQLバックアップ検証
flyctl postgres backups list -a ritsubi-vendure-db
# Redis AOF/RDB ファイル検証
redis-cli LASTSAVE
}
# メイン実行
main() {
echo "Starting maintenance at $(date)"
postgres_maintenance
redis_maintenance
log_rotation
metrics_cleanup
backup_verification
echo "Maintenance completed at $(date)"
}
main "$@"
文書バージョン: 1.0 作成日: 2025年9月17日 定期レビュー: 月次で監視設定とアラート閾値を見直し