Complete Rebuild, with Subsystems for each component. RTOS Tasks. (help by Claude)

This commit is contained in:
2025-10-01 12:42:00 +03:00
parent 104c1d04d4
commit f696984cd1
57 changed files with 11757 additions and 2290 deletions

View File

@@ -0,0 +1,314 @@
/*
* ═══════════════════════════════════════════════════════════════════════════════════
* HEALTHMONITOR.HPP - System Health Monitoring and Firmware Validation
* ═══════════════════════════════════════════════════════════════════════════════════
*
* 🏥 THE SYSTEM HEALTH GUARDIAN OF VESPER 🏥
*
* This class provides comprehensive system health monitoring across all subsystems.
* It determines whether the current firmware is stable and functional, or if a
* rollback to the previous firmware version should be performed.
*
* 🏗️ ARCHITECTURE:
* • Periodic health checks across all major subsystems
* • Critical vs non-critical failure classification
* • Firmware stability validation and rollback decision making
* • Centralized health status reporting
* • Thread-safe operation with configurable check intervals
*
* 🔍 MONITORED SUBSYSTEMS:
* • BellEngine: Core timing and bell control system
* • OutputManager: Hardware abstraction layer
* • Communication: MQTT, WebSocket, and UDP protocols
* • Player: Melody playback management
* • TimeKeeper: RTC and time synchronization
* • Telemetry: System monitoring and analytics
* • OTAManager: Firmware update management
* • Networking: Network connectivity management
* • ConfigManager: Configuration and persistence
* • FileManager: SD card and file operations
*
* 🚨 FAILURE CLASSIFICATION:
* • CRITICAL: Failures that make the device unusable
* • WARNING: Failures that affect functionality but allow operation
* • INFO: Minor issues that don't affect core functionality
*
* 🔄 FIRMWARE VALIDATION:
* • Boot-time stability check
* • Runtime health monitoring
* • Automatic rollback decision making
* • Health status persistence
*
* 📋 VERSION: 1.0 (Initial health monitoring system)
* 📅 DATE: 2025
* 👨‍💻 AUTHOR: Advanced Bell Systems
* ═══════════════════════════════════════════════════════════════════════════════════
*/
#pragma once
#include <Arduino.h>
#include <vector>
#include <map>
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "../Logging/Logging.hpp"
// Forward declarations for all monitored subsystems
class BellEngine;
class OutputManager;
class Communication;
class Player;
class Timekeeper;
class Telemetry;
class OTAManager;
class Networking;
class ConfigManager;
class FileManager;
/**
* @enum HealthStatus
* @brief Health status levels for subsystems
*/
enum class HealthStatus {
HEALTHY, // System is functioning normally
WARNING, // System has minor issues but is operational
CRITICAL, // System has major issues affecting functionality
FAILED // System is non-functional
};
/**
* @struct SubsystemHealth
* @brief Health information for a single subsystem
*/
struct SubsystemHealth {
String name; // Subsystem name
HealthStatus status; // Current health status
String lastError; // Last error message (if any)
unsigned long lastCheck; // Timestamp of last health check
bool isCritical; // Whether this subsystem is critical for operation
// Default constructor for std::map compatibility
SubsystemHealth()
: name(""), status(HealthStatus::HEALTHY), lastCheck(0), isCritical(false) {}
SubsystemHealth(const String& n, bool critical = false)
: name(n), status(HealthStatus::HEALTHY), lastCheck(0), isCritical(critical) {}
};
/**
* @class HealthMonitor
* @brief Comprehensive system health monitoring and firmware validation
*
* The HealthMonitor continuously monitors all subsystems to ensure the firmware
* is stable and functional. It can make decisions about firmware rollbacks
* based on the overall system health.
*/
class HealthMonitor {
public:
// ═══════════════════════════════════════════════════════════════════════════════
// CONSTRUCTOR & INITIALIZATION
// ═══════════════════════════════════════════════════════════════════════════════
/**
* @brief Constructor - Initialize health monitoring system
*/
HealthMonitor();
/**
* @brief Destructor - Clean up resources
*/
~HealthMonitor();
/**
* @brief Initialize health monitoring system
* @return true if initialization successful
*/
bool begin();
// ═══════════════════════════════════════════════════════════════════════════════
// SUBSYSTEM REGISTRATION
// ═══════════════════════════════════════════════════════════════════════════════
/** @brief Register BellEngine for monitoring */
void setBellEngine(BellEngine* bellEngine) { _bellEngine = bellEngine; }
/** @brief Register OutputManager for monitoring */
void setOutputManager(OutputManager* outputManager) { _outputManager = outputManager; }
/** @brief Register Communication for monitoring */
void setCommunication(Communication* communication) { _communication = communication; }
/** @brief Register Player for monitoring */
void setPlayer(Player* player) { _player = player; }
/** @brief Register TimeKeeper for monitoring */
void setTimeKeeper(Timekeeper* timeKeeper) { _timeKeeper = timeKeeper; }
/** @brief Register Telemetry for monitoring */
void setTelemetry(Telemetry* telemetry) { _telemetry = telemetry; }
/** @brief Register OTAManager for monitoring */
void setOTAManager(OTAManager* otaManager) { _otaManager = otaManager; }
/** @brief Register Networking for monitoring */
void setNetworking(Networking* networking) { _networking = networking; }
/** @brief Register ConfigManager for monitoring */
void setConfigManager(ConfigManager* configManager) { _configManager = configManager; }
/** @brief Register FileManager for monitoring */
void setFileManager(FileManager* fileManager) { _fileManager = fileManager; }
// ═══════════════════════════════════════════════════════════════════════════════
// HEALTH CHECK METHODS
// ═══════════════════════════════════════════════════════════════════════════════
/**
* @brief Perform comprehensive health check on all subsystems
* @return Overall system health status
*/
HealthStatus performFullHealthCheck();
/**
* @brief Perform health check on a specific subsystem
* @param subsystemName Name of the subsystem to check
* @return Health status of the specified subsystem
*/
HealthStatus checkSubsystemHealth(const String& subsystemName);
/**
* @brief Get current health status of all subsystems
* @return Map of subsystem names to their health information
*/
const std::map<String, SubsystemHealth>& getAllSubsystemHealth() const;
/**
* @brief Get health status of a specific subsystem
* @param subsystemName Name of the subsystem
* @return Health information for the subsystem
*/
SubsystemHealth getSubsystemHealth(const String& subsystemName) const;
// ═══════════════════════════════════════════════════════════════════════════════
// FIRMWARE VALIDATION
// ═══════════════════════════════════════════════════════════════════════════════
/**
* @brief Check if current firmware is stable and should be kept
* @return true if firmware is stable, false if rollback is recommended
*/
bool isFirmwareStable() const;
/**
* @brief Get the number of critical failures detected
* @return Count of subsystems with critical failures
*/
uint8_t getCriticalFailureCount() const;
/**
* @brief Get the number of warning-level issues detected
* @return Count of subsystems with warning-level issues
*/
uint8_t getWarningCount() const;
/**
* @brief Check if a firmware rollback is recommended
* @return true if rollback is recommended due to critical failures
*/
bool shouldRollbackFirmware() const;
// ═══════════════════════════════════════════════════════════════════════════════
// HEALTH REPORTING
// ═══════════════════════════════════════════════════════════════════════════════
/**
* @brief Generate a comprehensive health report
* @return JSON string containing detailed health information
*/
String generateHealthReport() const;
/**
* @brief Get a summary of system health
* @return Brief health summary string
*/
String getHealthSummary() const;
// ═══════════════════════════════════════════════════════════════════════════════
// CONFIGURATION
// ═══════════════════════════════════════════════════════════════════════════════
/**
* @brief Set health check interval
* @param intervalMs Interval between health checks in milliseconds
*/
void setHealthCheckInterval(unsigned long intervalMs) { _healthCheckInterval = intervalMs; }
/**
* @brief Enable or disable automatic health monitoring
* @param enabled Whether to enable automatic monitoring
*/
void setAutoMonitoring(bool enabled) { _autoMonitoring = enabled; }
private:
// ═══════════════════════════════════════════════════════════════════════════════
// SUBSYSTEM REFERENCES
// ═══════════════════════════════════════════════════════════════════════════════
BellEngine* _bellEngine = nullptr;
OutputManager* _outputManager = nullptr;
Communication* _communication = nullptr;
Player* _player = nullptr;
Timekeeper* _timeKeeper = nullptr;
Telemetry* _telemetry = nullptr;
OTAManager* _otaManager = nullptr;
Networking* _networking = nullptr;
ConfigManager* _configManager = nullptr;
FileManager* _fileManager = nullptr;
// ═══════════════════════════════════════════════════════════════════════════════
// HEALTH MONITORING STATE
// ═══════════════════════════════════════════════════════════════════════════════
std::map<String, SubsystemHealth> _subsystemHealth;
TaskHandle_t _monitoringTaskHandle = nullptr;
unsigned long _healthCheckInterval = 300000; // 5 minutes default
bool _autoMonitoring = true;
// ═══════════════════════════════════════════════════════════════════════════════
// PRIVATE HELPER METHODS
// ═══════════════════════════════════════════════════════════════════════════════
/**
* @brief Initialize all subsystem health entries
*/
void initializeSubsystemHealth();
/**
* @brief Monitoring task function
*/
static void monitoringTask(void* parameter);
/**
* @brief Main monitoring loop
*/
void monitoringLoop();
/**
* @brief Update health status for a specific subsystem
*/
void updateSubsystemHealth(const String& name, HealthStatus status, const String& error = "");
/**
* @brief Check if enough critical subsystems are healthy
*/
bool areCriticalSubsystemsHealthy() const;
/**
* @brief Calculate overall system health based on subsystem status
*/
HealthStatus calculateOverallHealth() const;
/**
* @brief Convert health status to string
*/
String healthStatusToString(HealthStatus status) const;
};