Initial commit

This commit is contained in:
2026-03-31 13:10:46 +02:00
commit f60d9628e0
52 changed files with 3383 additions and 0 deletions

View File

View File

View File

@@ -0,0 +1,196 @@
"""
Moteur de décision IA principal - Architecture hybride.
Flux: StateAnalyzer → HeuristicEngine + LLMAdvisor → Fusion → FullAdvice
"""
import time
from dataclasses import dataclass, field
import structlog
log = structlog.get_logger()
@dataclass
class GameState:
"""État complet d'un tour HSBG."""
turn: int = 0
tavern_tier: int = 1
gold: int = 3
hero_id: str = ""
hero_hp: int = 40
tavern_minions: list = field(default_factory=list)
board_minions: list = field(default_factory=list)
hand_minions: list = field(default_factory=list)
freeze: bool = False
can_upgrade: bool = True
upgrade_cost: int = 5
available_spells: list = field(default_factory=list)
opponent_boards: list = field(default_factory=list)
current_placement: int = 5
player_count: int = 8
phase: str = "recruit"
@dataclass
class Decision:
"""Une décision IA avec justification."""
action: str # buy|sell|freeze|upgrade|reposition|hero_power|wait
target: dict = None # Carte ou minion ciblé
priority: int = 5 # 1-10
confidence: float = 0.5 # 0.0-1.0
reasoning: str = "" # Explication textuelle
alternatives: list = field(default_factory=list)
synergies_highlighted: list = field(default_factory=list)
warnings: list = field(default_factory=list)
@dataclass
class FullAdvice:
"""Conseil complet pour un tour entier."""
main_decision: Decision
secondary_decisions: list = field(default_factory=list)
board_analysis: str = ""
strategy_long_term: str = ""
threat_assessment: str = ""
processing_ms: int = 0
model_used: str = "heuristic"
confidence_overall: float = 0.5
class DecisionEngine:
"""
Moteur de décision hybride HSBG.
Architecture:
1. StateAnalyzer → Parse l'état brut en GameState typé
2. HeuristicEngine → Règles métier rapides et déterministes
3. LLMAdvisor → Raisonnement LLM pour enrichir (si disponible)
4. Fusion → Combine les deux avec pondération par confiance
"""
def __init__(self, settings):
self.settings = settings
self._initialized = False
# Import lazy pour éviter les imports circulaires
from backend.ai.engine.heuristics import HeuristicEngine
from backend.ai.engine.llm_advisor import LLMAdvisor
from backend.ai.engine.state_analyzer import StateAnalyzer
self.heuristic = HeuristicEngine()
self.llm = LLMAdvisor(settings)
self.analyzer = StateAnalyzer()
async def initialize(self):
"""Initialise les composants asynchrones (LLM)."""
await self.llm.initialize()
self._initialized = True
log.info("decision_engine.ready", model=self.settings.llm_model)
async def get_advice(self, raw_state: dict) -> FullAdvice:
"""
Point d'entrée principal.
Args:
raw_state: Dict brut (depuis API ou OCR)
Returns:
FullAdvice avec conseil principal + alternatives
"""
start = time.perf_counter()
# 1. Parser l'état brut
state = await self.analyzer.parse(raw_state)
# 2. Heuristiques (toujours disponibles, < 5ms)
heuristic_decisions = self.heuristic.evaluate(state)
# 3. LLM (si disponible, ~500-2000ms)
llm_advice = None
if self._initialized:
try:
llm_advice = await self.llm.advise(state, heuristic_decisions)
except Exception as e:
log.warning("llm.failed_gracefully", error=str(e))
# 4. Fusion avec pondération
final = self._fuse(state, heuristic_decisions, llm_advice)
final.processing_ms = int((time.perf_counter() - start) * 1000)
log.info(
"advice.generated",
action=final.main_decision.action,
confidence=round(final.main_decision.confidence, 2),
ms=final.processing_ms,
model=final.model_used,
)
return final
def _fuse(self, state: GameState, heuristics: list, llm: "FullAdvice | None") -> FullAdvice:
"""
Fusionne heuristiques + LLM.
Règle: LLM prioritaire si confiance > 0.7, sinon heuristiques.
"""
if not heuristics:
return FullAdvice(
main_decision=Decision(action="wait", reasoning="Aucune action identifiée"),
model_used="fallback",
)
# LLM haute confiance → priorité totale
if llm and llm.confidence_overall > 0.7:
llm.model_used = f"{self.settings.llm_model}+heuristic"
return llm
# Heuristiques comme base
main = max(heuristics, key=lambda d: d.priority * d.confidence)
secondary = sorted(
[d for d in heuristics if d != main],
key=lambda d: d.priority,
reverse=True,
)[:3]
result = FullAdvice(
main_decision=main,
secondary_decisions=secondary,
board_analysis=self._analyze_board(state),
model_used="heuristic",
confidence_overall=main.confidence,
)
# Enrichissement LLM partiel (stratégie + menaces même si confiance faible)
if llm:
result.strategy_long_term = llm.strategy_long_term
result.threat_assessment = llm.threat_assessment
result.model_used = f"heuristic+{self.settings.llm_model}"
return result
def _analyze_board(self, state: GameState) -> str:
"""Génère une analyse textuelle rapide du board."""
parts = []
if len(state.board_minions) == 7:
parts.append("⚠️ Board plein — vendre avant d'acheter")
if state.gold >= 10 and state.tavern_tier < 6:
parts.append(f"💰 Or abondant — envisager tier {state.tavern_tier + 1}")
if state.current_placement > 5 and state.turn > 8:
parts.append("🚨 Position critique — trouver synergie forte rapidement")
# Détecter synergie dominante
from collections import Counter
races = []
for m in state.board_minions:
r = m.get("race", [])
races.extend(r if isinstance(r, list) else [r])
if races:
top_race, top_count = Counter(races).most_common(1)[0]
if top_count >= 3:
parts.append(f"✨ Synergie {top_race} détectée ({top_count}/7)")
if state.freeze:
parts.append("❄️ Taverne gelée — les cartes sont réservées")
return " | ".join(parts) if parts else "Board standard — continuer normalement"
async def shutdown(self):
"""Arrête proprement les composants."""
await self.llm.shutdown()

View File

@@ -0,0 +1,254 @@
"""
Moteur heuristique HSBG.
Règles métier codées en dur — rapides, déterministes, toujours disponibles.
"""
from collections import Counter
from backend.ai.engine.decision_engine import GameState, Decision
import structlog
log = structlog.get_logger()
class HeuristicEngine:
"""
Évalue l'état de jeu avec des règles métier HSBG.
Chaque règle retourne une Decision ou None.
Triées par priorité × confiance décroissante.
"""
def evaluate(self, state: GameState) -> list[Decision]:
"""Évalue toutes les règles et retourne les décisions candidates."""
rules = [
self._rule_triple, # Priorité max: triple = version dorée
self._rule_upgrade, # Montée de tier
self._rule_freeze, # Geler bonnes cartes
self._rule_buy_synergy, # Acheter dans la synergie
self._rule_sell, # Vendre les faibles
self._rule_economy, # Gestion de l'or
self._rule_reposition, # Positionnement board
]
decisions = []
for rule in rules:
try:
result = rule(state)
if result:
decisions.append(result)
except Exception as e:
log.warning("heuristic.error", rule=rule.__name__, error=str(e))
return sorted(decisions, key=lambda d: d.priority * d.confidence, reverse=True)
# ─── Règles ───────────────────────────────────────────────────────────────
def _rule_triple(self, state: GameState) -> Decision | None:
"""Détecter et compléter les triples — priorité maximale."""
all_minions = state.board_minions + state.hand_minions
counts = Counter(m.get("name") for m in all_minions if m.get("name"))
for name, count in counts.items():
if count >= 2:
# Chercher le 3ème en taverne
for m in state.tavern_minions:
if m.get("name") == name:
can_afford = m.get("cost", 3) <= state.gold
return Decision(
action="buy",
target=m,
priority=9,
confidence=0.90,
reasoning=f"🏆 TRIPLE en vue! Acheter {name} → version dorée!",
warnings=[] if can_afford else [
f"Manque {m.get('cost',3) - state.gold}g — vendre un serviteur si nécessaire"
],
)
return None
def _rule_upgrade(self, state: GameState) -> Decision | None:
"""Monter de tier au bon moment."""
if not state.can_upgrade or state.gold < state.upgrade_cost:
return None
# Montée d'urgence (HP bas + mauvaise position)
if state.current_placement >= 6 and state.hero_hp < 20 and state.upgrade_cost <= 4:
return Decision(
action="upgrade",
priority=8,
confidence=0.80,
reasoning=f"🚨 Urgence: HP={state.hero_hp}, position={state.current_placement} → tier {state.tavern_tier + 1}",
)
# Calendrier optimal de montée
optimal = {1: 3, 2: 5, 3: 7, 4: 10, 5: 14}
target_turn = optimal.get(state.tavern_tier, 99)
if state.turn >= target_turn:
return Decision(
action="upgrade",
priority=6,
confidence=0.65,
reasoning=f"📈 Tour {state.turn}: montée optimale vers tier {state.tavern_tier + 1} ({state.upgrade_cost}g)",
)
# Montée accélérée si beaucoup d'or en retard
if state.gold >= state.upgrade_cost + 4 and state.turn >= target_turn - 2:
return Decision(
action="upgrade",
priority=5,
confidence=0.55,
reasoning=f"💰 Or abondant ({state.gold}g) — montée anticipée tier {state.tavern_tier + 1}",
)
return None
def _rule_freeze(self, state: GameState) -> Decision | None:
"""Geler si de bonnes cartes ne sont pas achetables ce tour."""
if state.freeze:
return None # Déjà gelé
strong = [m for m in state.tavern_minions if self._is_strong(m, state)]
if len(strong) < 2:
return None
affordable = [m for m in strong if m.get("cost", 3) <= state.gold]
unaffordable = [m for m in strong if m.get("cost", 3) > state.gold]
if unaffordable:
return Decision(
action="freeze",
priority=7,
confidence=0.68,
reasoning=f"❄️ Geler: {len(strong)} bonne(s) carte(s), {len(unaffordable)} non achetable(s) ce tour",
synergies_highlighted=[m.get("name", "") for m in strong],
)
return None
def _rule_buy_synergy(self, state: GameState) -> Decision | None:
"""Acheter une carte qui renforce la synergie de race principale."""
if state.gold < 3 or not state.tavern_minions:
return None
# Calculer la race dominante sur le board
races = []
for m in state.board_minions:
r = m.get("race", [])
races.extend(r if isinstance(r, list) else [r])
if not races:
return None
race_counts = Counter(races)
top_race, top_count = race_counts.most_common(1)[0]
if top_count < 2 or top_race in ("none", "", "all"):
return None
# Chercher en taverne une carte de cette race
synergy_targets = []
for m in state.tavern_minions:
m_races = m.get("race", [])
if isinstance(m_races, str):
m_races = [m_races]
if top_race in m_races and m.get("cost", 3) <= state.gold:
synergy_targets.append(m)
if synergy_targets:
# Préférer les cartes avec des effets
best = max(synergy_targets, key=lambda m: (
int(m.get("has_divine", 0)) * 3 +
int(bool(m.get("battlecry"))) * 2 +
int(bool(m.get("deathrattle"))) * 2 +
m.get("attack", 0) + m.get("health", 0)
))
return Decision(
action="buy",
target=best,
priority=7,
confidence=0.72,
reasoning=f"🔗 Renforcer synergie {top_race} ({top_count} sur board): acheter {best.get('name', '?')}",
synergies_highlighted=[top_race],
)
return None
def _rule_sell(self, state: GameState) -> Decision | None:
"""Vendre les serviteurs trop faibles pour libérer de la place."""
if len(state.board_minions) < 6:
return None
weak = [m for m in state.board_minions if self._is_weak(m, state)]
if not weak:
return None
worst = min(weak, key=lambda m: m.get("attack", 0) + m.get("health", 0))
return Decision(
action="sell",
target=worst,
priority=5,
confidence=0.62,
reasoning=f"🗑️ Vendre {worst.get('name', '?')} ({worst.get('attack',0)}/{worst.get('health',0)}) — trop faible en tier {state.tavern_tier}",
)
def _rule_economy(self, state: GameState) -> Decision | None:
"""Gérer prudemment l'or en début de partie."""
if state.gold <= 2 and state.turn < 4:
return Decision(
action="wait",
priority=3,
confidence=0.50,
reasoning=f"💸 Or limité ({state.gold}g) en tour {state.turn} — économiser pour la suite",
warnings=["Éviter les gels coûteux en early game"],
)
return None
def _rule_reposition(self, state: GameState) -> Decision | None:
"""Suggérer un repositionnement si des cartes clés sont mal placées."""
if len(state.board_minions) < 3:
return None
has_taunt = any(m.get("has_taunt") for m in state.board_minions)
has_divine = any(m.get("has_divine") for m in state.board_minions)
has_cleave = any(m.get("on_attack") and "adjacent" in m.get("on_attack","").lower()
for m in state.board_minions)
if has_taunt or has_divine or has_cleave:
tips = []
if has_taunt:
tips.append("Taunt à gauche (absorbe les attaques)")
if has_divine:
tips.append("Divine Shield au centre ou protégé")
if has_cleave:
tips.append("Cleave en position 1 ou 3")
return Decision(
action="reposition",
priority=4,
confidence=0.58,
reasoning=f"🗺️ Optimiser le board: {' | '.join(tips)}",
)
return None
# ─── Helpers ──────────────────────────────────────────────────────────────
def _is_strong(self, minion: dict, state: GameState) -> bool:
"""Un serviteur est fort si ses stats dépassent le seuil du tier actuel."""
stat_thresholds = {1: 4, 2: 7, 3: 10, 4: 14, 5: 18, 6: 22}
min_stats = stat_thresholds.get(state.tavern_tier, 8)
stats = minion.get("attack", 0) + minion.get("health", 0)
return (
stats >= min_stats
or minion.get("has_divine", False)
or minion.get("has_taunt", False)
or bool(minion.get("battlecry", ""))
or bool(minion.get("deathrattle", ""))
or bool(minion.get("passive", ""))
)
def _is_weak(self, minion: dict, state: GameState) -> bool:
"""Un serviteur tier 1-2 sans capacité est faible en mid/late game."""
if state.turn < 8:
return False
tier = int(minion.get("tier", "1"))
if tier > 2:
return False
stats = minion.get("attack", 0) + minion.get("health", 0)
has_ability = (
minion.get("has_divine") or minion.get("has_taunt") or
minion.get("battlecry") or minion.get("deathrattle") or minion.get("passive")
)
return stats < 6 and not has_ability

View File

@@ -0,0 +1,134 @@
"""Conseiller LLM local via Ollama — enrichit les décisions heuristiques."""
import json
import httpx
import structlog
from backend.ai.engine.decision_engine import GameState, Decision, FullAdvice
log = structlog.get_logger()
SYSTEM_PROMPT = """Tu es un expert Hearthstone Battlegrounds rang Légende.
Analyse l'état de jeu et donne un conseil tactique optimal.
Réponds UNIQUEMENT avec un objet JSON valide, sans texte avant/après, sans markdown:
{"main_action":"buy|sell|freeze|upgrade|reposition|hero_power|wait","target_card":null,"priority":7,"confidence":0.8,"reasoning":"explication courte","strategy":"stratégie long terme 1 phrase","threats":"principale menace adversaire","warnings":[]}"""
class LLMAdvisor:
"""Interface avec Ollama pour les conseils LLM."""
def __init__(self, settings):
self.settings = settings
self.base_url = settings.llm_base_url
self.model = settings.llm_model
self._client: httpx.AsyncClient | None = None
self._available = False
async def initialize(self):
"""Teste la disponibilité d'Ollama et du modèle."""
self._client = httpx.AsyncClient(timeout=10)
try:
r = await self._client.get(f"{self.base_url}/api/tags")
if r.status_code == 200:
models = r.json().get("models", [])
model_names = [m.get("name", "") for m in models]
self._available = any(self.model in name for name in model_names)
if self._available:
log.info("llm.ready", model=self.model)
else:
log.warning("llm.model_not_found", model=self.model,
available=model_names,
hint=f"Exécutez: ollama pull {self.model}")
except Exception as e:
log.warning("llm.ollama_unreachable", url=self.base_url, error=str(e),
hint="Installez Ollama: curl -fsSL https://ollama.ai/install.sh | sh")
self._available = False
async def advise(self, state: GameState, heuristics: list[Decision]) -> FullAdvice | None:
"""Demande un conseil au LLM local."""
if not self._available or not self._client:
return None
prompt = self._build_prompt(state, heuristics)
try:
r = await self._client.post(
f"{self.base_url}/api/generate",
json={
"model": self.model,
"prompt": prompt,
"system": SYSTEM_PROMPT,
"stream": False,
"options": {
"temperature": self.settings.llm_temperature,
"num_predict": self.settings.llm_max_tokens,
},
},
timeout=self.settings.llm_timeout,
)
if r.status_code == 200:
raw = r.json().get("response", "")
return self._parse(raw)
except Exception as e:
log.warning("llm.request_failed", error=str(e))
return None
def _build_prompt(self, state: GameState, decisions: list[Decision]) -> str:
"""Construit le prompt depuis l'état de jeu."""
board = ", ".join(
f"{m.get('name','?')}({m.get('attack',0)}/{m.get('health',0)})"
+ (" [DIVINE]" if m.get("has_divine") else "")
+ (" [TAUNT]" if m.get("has_taunt") else "")
for m in state.board_minions
) or "vide"
tavern = ", ".join(
f"{m.get('name','?')}[{m.get('cost',3)}g T{m.get('tier',1)}]"
for m in state.tavern_minions
) or "vide"
top_h = decisions[0].reasoning if decisions else "aucune heuristique"
return f"""=== ÉTAT DU JEU ===
Tour: {state.turn} | Tier taverne: {state.tavern_tier} | Or: {state.gold}g
Héros: {state.hero_id} | HP: {state.hero_hp} | Position: {state.current_placement}/{state.player_count}
Board ({len(state.board_minions)}/7): {board}
Taverne: {tavern}
Gel: {'OUI' if state.freeze else 'NON'} | Upgrade possible: {'OUI' if state.can_upgrade else 'NON'} ({state.upgrade_cost}g)
Phase: {state.phase}
=== MEILLEURE HEURISTIQUE ===
{top_h}
Analyse et donne ta recommandation JSON."""
def _parse(self, raw: str) -> FullAdvice | None:
"""Parse la réponse JSON du LLM."""
try:
start = raw.find("{")
end = raw.rfind("}") + 1
if start == -1 or end == 0:
return None
data = json.loads(raw[start:end])
main = Decision(
action=data.get("main_action", "wait"),
priority=min(10, max(1, int(data.get("priority", 5)))),
confidence=min(1.0, max(0.0, float(data.get("confidence", 0.5)))),
reasoning=data.get("reasoning", ""),
warnings=data.get("warnings", []) or [],
)
return FullAdvice(
main_decision=main,
strategy_long_term=data.get("strategy", ""),
threat_assessment=data.get("threats", ""),
confidence_overall=main.confidence,
model_used=self.model,
)
except (json.JSONDecodeError, ValueError, TypeError) as e:
log.warning("llm.parse_failed", error=str(e), raw_preview=raw[:300])
return None
async def shutdown(self):
if self._client:
await self._client.aclose()

View File

@@ -0,0 +1,26 @@
"""Parse et normalise l'état de jeu brut en GameState typé."""
from backend.ai.engine.decision_engine import GameState
class StateAnalyzer:
"""Convertit les données brutes (API, OCR, manuel) en GameState."""
async def parse(self, raw: dict) -> GameState:
return GameState(
turn=int(raw.get("turn", 0)),
tavern_tier=int(raw.get("tavern_tier", 1)),
gold=int(raw.get("gold", 3)),
hero_id=str(raw.get("hero_id", "")),
hero_hp=int(raw.get("hero_hp", 40)),
tavern_minions=list(raw.get("tavern_minions", [])),
board_minions=list(raw.get("board_minions", [])),
hand_minions=list(raw.get("hand_minions", [])),
freeze=bool(raw.get("freeze", False)),
can_upgrade=bool(raw.get("can_upgrade", True)),
upgrade_cost=int(raw.get("upgrade_cost", 5)),
available_spells=list(raw.get("available_spells", [])),
opponent_boards=list(raw.get("opponent_boards", [])),
current_placement=int(raw.get("current_placement", 5)),
player_count=int(raw.get("player_count", 8)),
phase=str(raw.get("phase", "recruit")),
)

View File

View File

@@ -0,0 +1,117 @@
"""Système d'apprentissage par feedback utilisateur."""
import json
import os
from datetime import datetime
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select
from backend.database.models import AIDecision, LearningFeedback
import structlog
log = structlog.get_logger()
class FeedbackProcessor:
"""
Traite les retours utilisateur pour améliorer l'IA.
Workflow:
1. Utilisateur évalue une décision (bon/mauvais/neutre)
2. Le feedback est persisté en DB
3. Un buffer accumule les feedbacks
4. Quand le buffer est plein → export JSON pour entraînement futur
"""
def __init__(self, settings):
self.settings = settings
self._buffer: list[dict] = []
self._trained_count = 0
async def record_feedback(
self,
db: AsyncSession,
decision_id: int,
rating: str,
better_action: dict | None = None,
comment: str | None = None,
) -> LearningFeedback:
"""Enregistre un feedback et met à jour la décision associée."""
decision = await db.get(AIDecision, decision_id)
if not decision:
raise ValueError(f"Décision {decision_id} introuvable")
# Créer le feedback
fb = LearningFeedback(
decision_id=decision_id,
rating=rating,
better_action=better_action,
comment=comment,
)
db.add(fb)
# Mettre à jour la décision avec le résultat
decision.outcome_rating = {"good": 1, "neutral": 0, "bad": -1}.get(rating, 0)
decision.user_feedback = comment
if better_action:
decision.better_decision = better_action
await db.flush()
# Buffer pour entraînement
self._buffer.append({
"decision_id": decision_id,
"game_state": decision.game_state,
"recommendation": decision.recommendation,
"rating": rating,
"better_action": better_action,
"ts": datetime.utcnow().isoformat(),
})
log.info("feedback.recorded", id=decision_id, rating=rating,
buffer=len(self._buffer))
# Auto-flush si buffer plein
if (self.settings.learning_auto_save
and len(self._buffer) >= self.settings.learning_batch_size):
await self._flush_buffer()
return fb
async def _flush_buffer(self):
"""Exporte le buffer en JSON pour entraînement."""
if not self._buffer:
return
os.makedirs("data/learning/feedback", exist_ok=True)
fname = f"data/learning/feedback/batch_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}.json"
try:
import aiofiles
async with aiofiles.open(fname, "w") as f:
await f.write(json.dumps(self._buffer, indent=2, ensure_ascii=False))
self._trained_count += len(self._buffer)
log.info("feedback.batch_saved", count=len(self._buffer), file=fname)
except Exception as e:
log.error("feedback.flush_failed", error=str(e))
finally:
self._buffer.clear()
async def force_flush(self):
"""Flush manuel du buffer."""
await self._flush_buffer()
async def get_stats(self, db: AsyncSession) -> dict:
"""Statistiques globales du système d'apprentissage."""
result = await db.execute(select(LearningFeedback))
feedbacks = result.scalars().all()
total = len(feedbacks)
good = sum(1 for f in feedbacks if f.rating == "good")
bad = sum(1 for f in feedbacks if f.rating == "bad")
neutral = total - good - bad
return {
"total": total,
"good": good,
"bad": bad,
"neutral": neutral,
"good_rate": round(good / total * 100, 1) if total > 0 else 0.0,
"trained": self._trained_count,
"buffer_pending": len(self._buffer),
"learning_enabled": self.settings.learning_enabled,
}