Initial commit
This commit is contained in:
0
hsbg_ai/backend/ai/__init__.py
Normal file
0
hsbg_ai/backend/ai/__init__.py
Normal file
0
hsbg_ai/backend/ai/engine/__init__.py
Normal file
0
hsbg_ai/backend/ai/engine/__init__.py
Normal file
196
hsbg_ai/backend/ai/engine/decision_engine.py
Normal file
196
hsbg_ai/backend/ai/engine/decision_engine.py
Normal file
@@ -0,0 +1,196 @@
|
||||
"""
|
||||
Moteur de décision IA principal - Architecture hybride.
|
||||
Flux: StateAnalyzer → HeuristicEngine + LLMAdvisor → Fusion → FullAdvice
|
||||
"""
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
import structlog
|
||||
|
||||
log = structlog.get_logger()
|
||||
|
||||
|
||||
@dataclass
|
||||
class GameState:
|
||||
"""État complet d'un tour HSBG."""
|
||||
turn: int = 0
|
||||
tavern_tier: int = 1
|
||||
gold: int = 3
|
||||
hero_id: str = ""
|
||||
hero_hp: int = 40
|
||||
tavern_minions: list = field(default_factory=list)
|
||||
board_minions: list = field(default_factory=list)
|
||||
hand_minions: list = field(default_factory=list)
|
||||
freeze: bool = False
|
||||
can_upgrade: bool = True
|
||||
upgrade_cost: int = 5
|
||||
available_spells: list = field(default_factory=list)
|
||||
opponent_boards: list = field(default_factory=list)
|
||||
current_placement: int = 5
|
||||
player_count: int = 8
|
||||
phase: str = "recruit"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Decision:
|
||||
"""Une décision IA avec justification."""
|
||||
action: str # buy|sell|freeze|upgrade|reposition|hero_power|wait
|
||||
target: dict = None # Carte ou minion ciblé
|
||||
priority: int = 5 # 1-10
|
||||
confidence: float = 0.5 # 0.0-1.0
|
||||
reasoning: str = "" # Explication textuelle
|
||||
alternatives: list = field(default_factory=list)
|
||||
synergies_highlighted: list = field(default_factory=list)
|
||||
warnings: list = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class FullAdvice:
|
||||
"""Conseil complet pour un tour entier."""
|
||||
main_decision: Decision
|
||||
secondary_decisions: list = field(default_factory=list)
|
||||
board_analysis: str = ""
|
||||
strategy_long_term: str = ""
|
||||
threat_assessment: str = ""
|
||||
processing_ms: int = 0
|
||||
model_used: str = "heuristic"
|
||||
confidence_overall: float = 0.5
|
||||
|
||||
|
||||
class DecisionEngine:
|
||||
"""
|
||||
Moteur de décision hybride HSBG.
|
||||
|
||||
Architecture:
|
||||
1. StateAnalyzer → Parse l'état brut en GameState typé
|
||||
2. HeuristicEngine → Règles métier rapides et déterministes
|
||||
3. LLMAdvisor → Raisonnement LLM pour enrichir (si disponible)
|
||||
4. Fusion → Combine les deux avec pondération par confiance
|
||||
"""
|
||||
|
||||
def __init__(self, settings):
|
||||
self.settings = settings
|
||||
self._initialized = False
|
||||
# Import lazy pour éviter les imports circulaires
|
||||
from backend.ai.engine.heuristics import HeuristicEngine
|
||||
from backend.ai.engine.llm_advisor import LLMAdvisor
|
||||
from backend.ai.engine.state_analyzer import StateAnalyzer
|
||||
self.heuristic = HeuristicEngine()
|
||||
self.llm = LLMAdvisor(settings)
|
||||
self.analyzer = StateAnalyzer()
|
||||
|
||||
async def initialize(self):
|
||||
"""Initialise les composants asynchrones (LLM)."""
|
||||
await self.llm.initialize()
|
||||
self._initialized = True
|
||||
log.info("decision_engine.ready", model=self.settings.llm_model)
|
||||
|
||||
async def get_advice(self, raw_state: dict) -> FullAdvice:
|
||||
"""
|
||||
Point d'entrée principal.
|
||||
|
||||
Args:
|
||||
raw_state: Dict brut (depuis API ou OCR)
|
||||
Returns:
|
||||
FullAdvice avec conseil principal + alternatives
|
||||
"""
|
||||
start = time.perf_counter()
|
||||
|
||||
# 1. Parser l'état brut
|
||||
state = await self.analyzer.parse(raw_state)
|
||||
|
||||
# 2. Heuristiques (toujours disponibles, < 5ms)
|
||||
heuristic_decisions = self.heuristic.evaluate(state)
|
||||
|
||||
# 3. LLM (si disponible, ~500-2000ms)
|
||||
llm_advice = None
|
||||
if self._initialized:
|
||||
try:
|
||||
llm_advice = await self.llm.advise(state, heuristic_decisions)
|
||||
except Exception as e:
|
||||
log.warning("llm.failed_gracefully", error=str(e))
|
||||
|
||||
# 4. Fusion avec pondération
|
||||
final = self._fuse(state, heuristic_decisions, llm_advice)
|
||||
final.processing_ms = int((time.perf_counter() - start) * 1000)
|
||||
|
||||
log.info(
|
||||
"advice.generated",
|
||||
action=final.main_decision.action,
|
||||
confidence=round(final.main_decision.confidence, 2),
|
||||
ms=final.processing_ms,
|
||||
model=final.model_used,
|
||||
)
|
||||
return final
|
||||
|
||||
def _fuse(self, state: GameState, heuristics: list, llm: "FullAdvice | None") -> FullAdvice:
|
||||
"""
|
||||
Fusionne heuristiques + LLM.
|
||||
Règle: LLM prioritaire si confiance > 0.7, sinon heuristiques.
|
||||
"""
|
||||
if not heuristics:
|
||||
return FullAdvice(
|
||||
main_decision=Decision(action="wait", reasoning="Aucune action identifiée"),
|
||||
model_used="fallback",
|
||||
)
|
||||
|
||||
# LLM haute confiance → priorité totale
|
||||
if llm and llm.confidence_overall > 0.7:
|
||||
llm.model_used = f"{self.settings.llm_model}+heuristic"
|
||||
return llm
|
||||
|
||||
# Heuristiques comme base
|
||||
main = max(heuristics, key=lambda d: d.priority * d.confidence)
|
||||
secondary = sorted(
|
||||
[d for d in heuristics if d != main],
|
||||
key=lambda d: d.priority,
|
||||
reverse=True,
|
||||
)[:3]
|
||||
|
||||
result = FullAdvice(
|
||||
main_decision=main,
|
||||
secondary_decisions=secondary,
|
||||
board_analysis=self._analyze_board(state),
|
||||
model_used="heuristic",
|
||||
confidence_overall=main.confidence,
|
||||
)
|
||||
|
||||
# Enrichissement LLM partiel (stratégie + menaces même si confiance faible)
|
||||
if llm:
|
||||
result.strategy_long_term = llm.strategy_long_term
|
||||
result.threat_assessment = llm.threat_assessment
|
||||
result.model_used = f"heuristic+{self.settings.llm_model}"
|
||||
|
||||
return result
|
||||
|
||||
def _analyze_board(self, state: GameState) -> str:
|
||||
"""Génère une analyse textuelle rapide du board."""
|
||||
parts = []
|
||||
|
||||
if len(state.board_minions) == 7:
|
||||
parts.append("⚠️ Board plein — vendre avant d'acheter")
|
||||
|
||||
if state.gold >= 10 and state.tavern_tier < 6:
|
||||
parts.append(f"💰 Or abondant — envisager tier {state.tavern_tier + 1}")
|
||||
|
||||
if state.current_placement > 5 and state.turn > 8:
|
||||
parts.append("🚨 Position critique — trouver synergie forte rapidement")
|
||||
|
||||
# Détecter synergie dominante
|
||||
from collections import Counter
|
||||
races = []
|
||||
for m in state.board_minions:
|
||||
r = m.get("race", [])
|
||||
races.extend(r if isinstance(r, list) else [r])
|
||||
if races:
|
||||
top_race, top_count = Counter(races).most_common(1)[0]
|
||||
if top_count >= 3:
|
||||
parts.append(f"✨ Synergie {top_race} détectée ({top_count}/7)")
|
||||
|
||||
if state.freeze:
|
||||
parts.append("❄️ Taverne gelée — les cartes sont réservées")
|
||||
|
||||
return " | ".join(parts) if parts else "Board standard — continuer normalement"
|
||||
|
||||
async def shutdown(self):
|
||||
"""Arrête proprement les composants."""
|
||||
await self.llm.shutdown()
|
||||
254
hsbg_ai/backend/ai/engine/heuristics.py
Normal file
254
hsbg_ai/backend/ai/engine/heuristics.py
Normal file
@@ -0,0 +1,254 @@
|
||||
"""
|
||||
Moteur heuristique HSBG.
|
||||
Règles métier codées en dur — rapides, déterministes, toujours disponibles.
|
||||
"""
|
||||
from collections import Counter
|
||||
from backend.ai.engine.decision_engine import GameState, Decision
|
||||
import structlog
|
||||
|
||||
log = structlog.get_logger()
|
||||
|
||||
|
||||
class HeuristicEngine:
|
||||
"""
|
||||
Évalue l'état de jeu avec des règles métier HSBG.
|
||||
Chaque règle retourne une Decision ou None.
|
||||
Triées par priorité × confiance décroissante.
|
||||
"""
|
||||
|
||||
def evaluate(self, state: GameState) -> list[Decision]:
|
||||
"""Évalue toutes les règles et retourne les décisions candidates."""
|
||||
rules = [
|
||||
self._rule_triple, # Priorité max: triple = version dorée
|
||||
self._rule_upgrade, # Montée de tier
|
||||
self._rule_freeze, # Geler bonnes cartes
|
||||
self._rule_buy_synergy, # Acheter dans la synergie
|
||||
self._rule_sell, # Vendre les faibles
|
||||
self._rule_economy, # Gestion de l'or
|
||||
self._rule_reposition, # Positionnement board
|
||||
]
|
||||
decisions = []
|
||||
for rule in rules:
|
||||
try:
|
||||
result = rule(state)
|
||||
if result:
|
||||
decisions.append(result)
|
||||
except Exception as e:
|
||||
log.warning("heuristic.error", rule=rule.__name__, error=str(e))
|
||||
|
||||
return sorted(decisions, key=lambda d: d.priority * d.confidence, reverse=True)
|
||||
|
||||
# ─── Règles ───────────────────────────────────────────────────────────────
|
||||
|
||||
def _rule_triple(self, state: GameState) -> Decision | None:
|
||||
"""Détecter et compléter les triples — priorité maximale."""
|
||||
all_minions = state.board_minions + state.hand_minions
|
||||
counts = Counter(m.get("name") for m in all_minions if m.get("name"))
|
||||
|
||||
for name, count in counts.items():
|
||||
if count >= 2:
|
||||
# Chercher le 3ème en taverne
|
||||
for m in state.tavern_minions:
|
||||
if m.get("name") == name:
|
||||
can_afford = m.get("cost", 3) <= state.gold
|
||||
return Decision(
|
||||
action="buy",
|
||||
target=m,
|
||||
priority=9,
|
||||
confidence=0.90,
|
||||
reasoning=f"🏆 TRIPLE en vue! Acheter {name} → version dorée!",
|
||||
warnings=[] if can_afford else [
|
||||
f"Manque {m.get('cost',3) - state.gold}g — vendre un serviteur si nécessaire"
|
||||
],
|
||||
)
|
||||
return None
|
||||
|
||||
def _rule_upgrade(self, state: GameState) -> Decision | None:
|
||||
"""Monter de tier au bon moment."""
|
||||
if not state.can_upgrade or state.gold < state.upgrade_cost:
|
||||
return None
|
||||
|
||||
# Montée d'urgence (HP bas + mauvaise position)
|
||||
if state.current_placement >= 6 and state.hero_hp < 20 and state.upgrade_cost <= 4:
|
||||
return Decision(
|
||||
action="upgrade",
|
||||
priority=8,
|
||||
confidence=0.80,
|
||||
reasoning=f"🚨 Urgence: HP={state.hero_hp}, position={state.current_placement} → tier {state.tavern_tier + 1}",
|
||||
)
|
||||
|
||||
# Calendrier optimal de montée
|
||||
optimal = {1: 3, 2: 5, 3: 7, 4: 10, 5: 14}
|
||||
target_turn = optimal.get(state.tavern_tier, 99)
|
||||
if state.turn >= target_turn:
|
||||
return Decision(
|
||||
action="upgrade",
|
||||
priority=6,
|
||||
confidence=0.65,
|
||||
reasoning=f"📈 Tour {state.turn}: montée optimale vers tier {state.tavern_tier + 1} ({state.upgrade_cost}g)",
|
||||
)
|
||||
|
||||
# Montée accélérée si beaucoup d'or en retard
|
||||
if state.gold >= state.upgrade_cost + 4 and state.turn >= target_turn - 2:
|
||||
return Decision(
|
||||
action="upgrade",
|
||||
priority=5,
|
||||
confidence=0.55,
|
||||
reasoning=f"💰 Or abondant ({state.gold}g) — montée anticipée tier {state.tavern_tier + 1}",
|
||||
)
|
||||
return None
|
||||
|
||||
def _rule_freeze(self, state: GameState) -> Decision | None:
|
||||
"""Geler si de bonnes cartes ne sont pas achetables ce tour."""
|
||||
if state.freeze:
|
||||
return None # Déjà gelé
|
||||
|
||||
strong = [m for m in state.tavern_minions if self._is_strong(m, state)]
|
||||
if len(strong) < 2:
|
||||
return None
|
||||
|
||||
affordable = [m for m in strong if m.get("cost", 3) <= state.gold]
|
||||
unaffordable = [m for m in strong if m.get("cost", 3) > state.gold]
|
||||
|
||||
if unaffordable:
|
||||
return Decision(
|
||||
action="freeze",
|
||||
priority=7,
|
||||
confidence=0.68,
|
||||
reasoning=f"❄️ Geler: {len(strong)} bonne(s) carte(s), {len(unaffordable)} non achetable(s) ce tour",
|
||||
synergies_highlighted=[m.get("name", "") for m in strong],
|
||||
)
|
||||
return None
|
||||
|
||||
def _rule_buy_synergy(self, state: GameState) -> Decision | None:
|
||||
"""Acheter une carte qui renforce la synergie de race principale."""
|
||||
if state.gold < 3 or not state.tavern_minions:
|
||||
return None
|
||||
|
||||
# Calculer la race dominante sur le board
|
||||
races = []
|
||||
for m in state.board_minions:
|
||||
r = m.get("race", [])
|
||||
races.extend(r if isinstance(r, list) else [r])
|
||||
|
||||
if not races:
|
||||
return None
|
||||
|
||||
race_counts = Counter(races)
|
||||
top_race, top_count = race_counts.most_common(1)[0]
|
||||
|
||||
if top_count < 2 or top_race in ("none", "", "all"):
|
||||
return None
|
||||
|
||||
# Chercher en taverne une carte de cette race
|
||||
synergy_targets = []
|
||||
for m in state.tavern_minions:
|
||||
m_races = m.get("race", [])
|
||||
if isinstance(m_races, str):
|
||||
m_races = [m_races]
|
||||
if top_race in m_races and m.get("cost", 3) <= state.gold:
|
||||
synergy_targets.append(m)
|
||||
|
||||
if synergy_targets:
|
||||
# Préférer les cartes avec des effets
|
||||
best = max(synergy_targets, key=lambda m: (
|
||||
int(m.get("has_divine", 0)) * 3 +
|
||||
int(bool(m.get("battlecry"))) * 2 +
|
||||
int(bool(m.get("deathrattle"))) * 2 +
|
||||
m.get("attack", 0) + m.get("health", 0)
|
||||
))
|
||||
return Decision(
|
||||
action="buy",
|
||||
target=best,
|
||||
priority=7,
|
||||
confidence=0.72,
|
||||
reasoning=f"🔗 Renforcer synergie {top_race} ({top_count} sur board): acheter {best.get('name', '?')}",
|
||||
synergies_highlighted=[top_race],
|
||||
)
|
||||
return None
|
||||
|
||||
def _rule_sell(self, state: GameState) -> Decision | None:
|
||||
"""Vendre les serviteurs trop faibles pour libérer de la place."""
|
||||
if len(state.board_minions) < 6:
|
||||
return None
|
||||
|
||||
weak = [m for m in state.board_minions if self._is_weak(m, state)]
|
||||
if not weak:
|
||||
return None
|
||||
|
||||
worst = min(weak, key=lambda m: m.get("attack", 0) + m.get("health", 0))
|
||||
return Decision(
|
||||
action="sell",
|
||||
target=worst,
|
||||
priority=5,
|
||||
confidence=0.62,
|
||||
reasoning=f"🗑️ Vendre {worst.get('name', '?')} ({worst.get('attack',0)}/{worst.get('health',0)}) — trop faible en tier {state.tavern_tier}",
|
||||
)
|
||||
|
||||
def _rule_economy(self, state: GameState) -> Decision | None:
|
||||
"""Gérer prudemment l'or en début de partie."""
|
||||
if state.gold <= 2 and state.turn < 4:
|
||||
return Decision(
|
||||
action="wait",
|
||||
priority=3,
|
||||
confidence=0.50,
|
||||
reasoning=f"💸 Or limité ({state.gold}g) en tour {state.turn} — économiser pour la suite",
|
||||
warnings=["Éviter les gels coûteux en early game"],
|
||||
)
|
||||
return None
|
||||
|
||||
def _rule_reposition(self, state: GameState) -> Decision | None:
|
||||
"""Suggérer un repositionnement si des cartes clés sont mal placées."""
|
||||
if len(state.board_minions) < 3:
|
||||
return None
|
||||
|
||||
has_taunt = any(m.get("has_taunt") for m in state.board_minions)
|
||||
has_divine = any(m.get("has_divine") for m in state.board_minions)
|
||||
has_cleave = any(m.get("on_attack") and "adjacent" in m.get("on_attack","").lower()
|
||||
for m in state.board_minions)
|
||||
|
||||
if has_taunt or has_divine or has_cleave:
|
||||
tips = []
|
||||
if has_taunt:
|
||||
tips.append("Taunt à gauche (absorbe les attaques)")
|
||||
if has_divine:
|
||||
tips.append("Divine Shield au centre ou protégé")
|
||||
if has_cleave:
|
||||
tips.append("Cleave en position 1 ou 3")
|
||||
return Decision(
|
||||
action="reposition",
|
||||
priority=4,
|
||||
confidence=0.58,
|
||||
reasoning=f"🗺️ Optimiser le board: {' | '.join(tips)}",
|
||||
)
|
||||
return None
|
||||
|
||||
# ─── Helpers ──────────────────────────────────────────────────────────────
|
||||
|
||||
def _is_strong(self, minion: dict, state: GameState) -> bool:
|
||||
"""Un serviteur est fort si ses stats dépassent le seuil du tier actuel."""
|
||||
stat_thresholds = {1: 4, 2: 7, 3: 10, 4: 14, 5: 18, 6: 22}
|
||||
min_stats = stat_thresholds.get(state.tavern_tier, 8)
|
||||
stats = minion.get("attack", 0) + minion.get("health", 0)
|
||||
return (
|
||||
stats >= min_stats
|
||||
or minion.get("has_divine", False)
|
||||
or minion.get("has_taunt", False)
|
||||
or bool(minion.get("battlecry", ""))
|
||||
or bool(minion.get("deathrattle", ""))
|
||||
or bool(minion.get("passive", ""))
|
||||
)
|
||||
|
||||
def _is_weak(self, minion: dict, state: GameState) -> bool:
|
||||
"""Un serviteur tier 1-2 sans capacité est faible en mid/late game."""
|
||||
if state.turn < 8:
|
||||
return False
|
||||
tier = int(minion.get("tier", "1"))
|
||||
if tier > 2:
|
||||
return False
|
||||
stats = minion.get("attack", 0) + minion.get("health", 0)
|
||||
has_ability = (
|
||||
minion.get("has_divine") or minion.get("has_taunt") or
|
||||
minion.get("battlecry") or minion.get("deathrattle") or minion.get("passive")
|
||||
)
|
||||
return stats < 6 and not has_ability
|
||||
134
hsbg_ai/backend/ai/engine/llm_advisor.py
Normal file
134
hsbg_ai/backend/ai/engine/llm_advisor.py
Normal file
@@ -0,0 +1,134 @@
|
||||
"""Conseiller LLM local via Ollama — enrichit les décisions heuristiques."""
|
||||
import json
|
||||
import httpx
|
||||
import structlog
|
||||
from backend.ai.engine.decision_engine import GameState, Decision, FullAdvice
|
||||
|
||||
log = structlog.get_logger()
|
||||
|
||||
SYSTEM_PROMPT = """Tu es un expert Hearthstone Battlegrounds rang Légende.
|
||||
Analyse l'état de jeu et donne un conseil tactique optimal.
|
||||
Réponds UNIQUEMENT avec un objet JSON valide, sans texte avant/après, sans markdown:
|
||||
{"main_action":"buy|sell|freeze|upgrade|reposition|hero_power|wait","target_card":null,"priority":7,"confidence":0.8,"reasoning":"explication courte","strategy":"stratégie long terme 1 phrase","threats":"principale menace adversaire","warnings":[]}"""
|
||||
|
||||
|
||||
class LLMAdvisor:
|
||||
"""Interface avec Ollama pour les conseils LLM."""
|
||||
|
||||
def __init__(self, settings):
|
||||
self.settings = settings
|
||||
self.base_url = settings.llm_base_url
|
||||
self.model = settings.llm_model
|
||||
self._client: httpx.AsyncClient | None = None
|
||||
self._available = False
|
||||
|
||||
async def initialize(self):
|
||||
"""Teste la disponibilité d'Ollama et du modèle."""
|
||||
self._client = httpx.AsyncClient(timeout=10)
|
||||
try:
|
||||
r = await self._client.get(f"{self.base_url}/api/tags")
|
||||
if r.status_code == 200:
|
||||
models = r.json().get("models", [])
|
||||
model_names = [m.get("name", "") for m in models]
|
||||
self._available = any(self.model in name for name in model_names)
|
||||
if self._available:
|
||||
log.info("llm.ready", model=self.model)
|
||||
else:
|
||||
log.warning("llm.model_not_found", model=self.model,
|
||||
available=model_names,
|
||||
hint=f"Exécutez: ollama pull {self.model}")
|
||||
except Exception as e:
|
||||
log.warning("llm.ollama_unreachable", url=self.base_url, error=str(e),
|
||||
hint="Installez Ollama: curl -fsSL https://ollama.ai/install.sh | sh")
|
||||
self._available = False
|
||||
|
||||
async def advise(self, state: GameState, heuristics: list[Decision]) -> FullAdvice | None:
|
||||
"""Demande un conseil au LLM local."""
|
||||
if not self._available or not self._client:
|
||||
return None
|
||||
|
||||
prompt = self._build_prompt(state, heuristics)
|
||||
|
||||
try:
|
||||
r = await self._client.post(
|
||||
f"{self.base_url}/api/generate",
|
||||
json={
|
||||
"model": self.model,
|
||||
"prompt": prompt,
|
||||
"system": SYSTEM_PROMPT,
|
||||
"stream": False,
|
||||
"options": {
|
||||
"temperature": self.settings.llm_temperature,
|
||||
"num_predict": self.settings.llm_max_tokens,
|
||||
},
|
||||
},
|
||||
timeout=self.settings.llm_timeout,
|
||||
)
|
||||
if r.status_code == 200:
|
||||
raw = r.json().get("response", "")
|
||||
return self._parse(raw)
|
||||
except Exception as e:
|
||||
log.warning("llm.request_failed", error=str(e))
|
||||
return None
|
||||
|
||||
def _build_prompt(self, state: GameState, decisions: list[Decision]) -> str:
|
||||
"""Construit le prompt depuis l'état de jeu."""
|
||||
board = ", ".join(
|
||||
f"{m.get('name','?')}({m.get('attack',0)}/{m.get('health',0)})"
|
||||
+ (" [DIVINE]" if m.get("has_divine") else "")
|
||||
+ (" [TAUNT]" if m.get("has_taunt") else "")
|
||||
for m in state.board_minions
|
||||
) or "vide"
|
||||
|
||||
tavern = ", ".join(
|
||||
f"{m.get('name','?')}[{m.get('cost',3)}g T{m.get('tier',1)}]"
|
||||
for m in state.tavern_minions
|
||||
) or "vide"
|
||||
|
||||
top_h = decisions[0].reasoning if decisions else "aucune heuristique"
|
||||
|
||||
return f"""=== ÉTAT DU JEU ===
|
||||
Tour: {state.turn} | Tier taverne: {state.tavern_tier} | Or: {state.gold}g
|
||||
Héros: {state.hero_id} | HP: {state.hero_hp} | Position: {state.current_placement}/{state.player_count}
|
||||
Board ({len(state.board_minions)}/7): {board}
|
||||
Taverne: {tavern}
|
||||
Gel: {'OUI' if state.freeze else 'NON'} | Upgrade possible: {'OUI' if state.can_upgrade else 'NON'} ({state.upgrade_cost}g)
|
||||
Phase: {state.phase}
|
||||
|
||||
=== MEILLEURE HEURISTIQUE ===
|
||||
{top_h}
|
||||
|
||||
Analyse et donne ta recommandation JSON."""
|
||||
|
||||
def _parse(self, raw: str) -> FullAdvice | None:
|
||||
"""Parse la réponse JSON du LLM."""
|
||||
try:
|
||||
start = raw.find("{")
|
||||
end = raw.rfind("}") + 1
|
||||
if start == -1 or end == 0:
|
||||
return None
|
||||
|
||||
data = json.loads(raw[start:end])
|
||||
|
||||
main = Decision(
|
||||
action=data.get("main_action", "wait"),
|
||||
priority=min(10, max(1, int(data.get("priority", 5)))),
|
||||
confidence=min(1.0, max(0.0, float(data.get("confidence", 0.5)))),
|
||||
reasoning=data.get("reasoning", ""),
|
||||
warnings=data.get("warnings", []) or [],
|
||||
)
|
||||
|
||||
return FullAdvice(
|
||||
main_decision=main,
|
||||
strategy_long_term=data.get("strategy", ""),
|
||||
threat_assessment=data.get("threats", ""),
|
||||
confidence_overall=main.confidence,
|
||||
model_used=self.model,
|
||||
)
|
||||
except (json.JSONDecodeError, ValueError, TypeError) as e:
|
||||
log.warning("llm.parse_failed", error=str(e), raw_preview=raw[:300])
|
||||
return None
|
||||
|
||||
async def shutdown(self):
|
||||
if self._client:
|
||||
await self._client.aclose()
|
||||
26
hsbg_ai/backend/ai/engine/state_analyzer.py
Normal file
26
hsbg_ai/backend/ai/engine/state_analyzer.py
Normal file
@@ -0,0 +1,26 @@
|
||||
"""Parse et normalise l'état de jeu brut en GameState typé."""
|
||||
from backend.ai.engine.decision_engine import GameState
|
||||
|
||||
|
||||
class StateAnalyzer:
|
||||
"""Convertit les données brutes (API, OCR, manuel) en GameState."""
|
||||
|
||||
async def parse(self, raw: dict) -> GameState:
|
||||
return GameState(
|
||||
turn=int(raw.get("turn", 0)),
|
||||
tavern_tier=int(raw.get("tavern_tier", 1)),
|
||||
gold=int(raw.get("gold", 3)),
|
||||
hero_id=str(raw.get("hero_id", "")),
|
||||
hero_hp=int(raw.get("hero_hp", 40)),
|
||||
tavern_minions=list(raw.get("tavern_minions", [])),
|
||||
board_minions=list(raw.get("board_minions", [])),
|
||||
hand_minions=list(raw.get("hand_minions", [])),
|
||||
freeze=bool(raw.get("freeze", False)),
|
||||
can_upgrade=bool(raw.get("can_upgrade", True)),
|
||||
upgrade_cost=int(raw.get("upgrade_cost", 5)),
|
||||
available_spells=list(raw.get("available_spells", [])),
|
||||
opponent_boards=list(raw.get("opponent_boards", [])),
|
||||
current_placement=int(raw.get("current_placement", 5)),
|
||||
player_count=int(raw.get("player_count", 8)),
|
||||
phase=str(raw.get("phase", "recruit")),
|
||||
)
|
||||
0
hsbg_ai/backend/ai/learning/__init__.py
Normal file
0
hsbg_ai/backend/ai/learning/__init__.py
Normal file
117
hsbg_ai/backend/ai/learning/feedback_processor.py
Normal file
117
hsbg_ai/backend/ai/learning/feedback_processor.py
Normal file
@@ -0,0 +1,117 @@
|
||||
"""Système d'apprentissage par feedback utilisateur."""
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select
|
||||
from backend.database.models import AIDecision, LearningFeedback
|
||||
import structlog
|
||||
|
||||
log = structlog.get_logger()
|
||||
|
||||
|
||||
class FeedbackProcessor:
|
||||
"""
|
||||
Traite les retours utilisateur pour améliorer l'IA.
|
||||
|
||||
Workflow:
|
||||
1. Utilisateur évalue une décision (bon/mauvais/neutre)
|
||||
2. Le feedback est persisté en DB
|
||||
3. Un buffer accumule les feedbacks
|
||||
4. Quand le buffer est plein → export JSON pour entraînement futur
|
||||
"""
|
||||
|
||||
def __init__(self, settings):
|
||||
self.settings = settings
|
||||
self._buffer: list[dict] = []
|
||||
self._trained_count = 0
|
||||
|
||||
async def record_feedback(
|
||||
self,
|
||||
db: AsyncSession,
|
||||
decision_id: int,
|
||||
rating: str,
|
||||
better_action: dict | None = None,
|
||||
comment: str | None = None,
|
||||
) -> LearningFeedback:
|
||||
"""Enregistre un feedback et met à jour la décision associée."""
|
||||
decision = await db.get(AIDecision, decision_id)
|
||||
if not decision:
|
||||
raise ValueError(f"Décision {decision_id} introuvable")
|
||||
|
||||
# Créer le feedback
|
||||
fb = LearningFeedback(
|
||||
decision_id=decision_id,
|
||||
rating=rating,
|
||||
better_action=better_action,
|
||||
comment=comment,
|
||||
)
|
||||
db.add(fb)
|
||||
|
||||
# Mettre à jour la décision avec le résultat
|
||||
decision.outcome_rating = {"good": 1, "neutral": 0, "bad": -1}.get(rating, 0)
|
||||
decision.user_feedback = comment
|
||||
if better_action:
|
||||
decision.better_decision = better_action
|
||||
|
||||
await db.flush()
|
||||
|
||||
# Buffer pour entraînement
|
||||
self._buffer.append({
|
||||
"decision_id": decision_id,
|
||||
"game_state": decision.game_state,
|
||||
"recommendation": decision.recommendation,
|
||||
"rating": rating,
|
||||
"better_action": better_action,
|
||||
"ts": datetime.utcnow().isoformat(),
|
||||
})
|
||||
|
||||
log.info("feedback.recorded", id=decision_id, rating=rating,
|
||||
buffer=len(self._buffer))
|
||||
|
||||
# Auto-flush si buffer plein
|
||||
if (self.settings.learning_auto_save
|
||||
and len(self._buffer) >= self.settings.learning_batch_size):
|
||||
await self._flush_buffer()
|
||||
|
||||
return fb
|
||||
|
||||
async def _flush_buffer(self):
|
||||
"""Exporte le buffer en JSON pour entraînement."""
|
||||
if not self._buffer:
|
||||
return
|
||||
os.makedirs("data/learning/feedback", exist_ok=True)
|
||||
fname = f"data/learning/feedback/batch_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}.json"
|
||||
try:
|
||||
import aiofiles
|
||||
async with aiofiles.open(fname, "w") as f:
|
||||
await f.write(json.dumps(self._buffer, indent=2, ensure_ascii=False))
|
||||
self._trained_count += len(self._buffer)
|
||||
log.info("feedback.batch_saved", count=len(self._buffer), file=fname)
|
||||
except Exception as e:
|
||||
log.error("feedback.flush_failed", error=str(e))
|
||||
finally:
|
||||
self._buffer.clear()
|
||||
|
||||
async def force_flush(self):
|
||||
"""Flush manuel du buffer."""
|
||||
await self._flush_buffer()
|
||||
|
||||
async def get_stats(self, db: AsyncSession) -> dict:
|
||||
"""Statistiques globales du système d'apprentissage."""
|
||||
result = await db.execute(select(LearningFeedback))
|
||||
feedbacks = result.scalars().all()
|
||||
total = len(feedbacks)
|
||||
good = sum(1 for f in feedbacks if f.rating == "good")
|
||||
bad = sum(1 for f in feedbacks if f.rating == "bad")
|
||||
neutral = total - good - bad
|
||||
return {
|
||||
"total": total,
|
||||
"good": good,
|
||||
"bad": bad,
|
||||
"neutral": neutral,
|
||||
"good_rate": round(good / total * 100, 1) if total > 0 else 0.0,
|
||||
"trained": self._trained_count,
|
||||
"buffer_pending": len(self._buffer),
|
||||
"learning_enabled": self.settings.learning_enabled,
|
||||
}
|
||||
Reference in New Issue
Block a user