打通自动回复与表情语义库联动\n\n- 新增表情语义解析与表情资产查询模块,支持从历史表情中提取可读中文语义\n- 为 ai_auto_response 增加短回复表情匹配器,命中语义时优先发送表情并支持失败回退文本\n- 调整自动回复提示词与配置项,强化短情绪回复场景的表情替换能力

This commit is contained in:
liuwei
2026-04-27 11:40:44 +08:00
parent 884ffb81e8
commit 623ca505d4
6 changed files with 540 additions and 2 deletions

View File

@@ -71,6 +71,20 @@ qa_with_context_total_limit = 30
default_char_limit = 30
default_total_limit = 30
[emoji_reply]
# 自动回复和表情库的衔接策略:
# 1. 模型仍然只输出自然文本,本地只在“极短情绪回复”场景里尝试换成表情;
# 2. 这样不用把 md5 暴露给模型,也更方便后续继续扩展同义词和人工校准;
# 3. 第一版只做保守替换,避免把正常答疑文本误发成表情。
enable = true
asset_scan_limit = 800
cache_ttl_sec = 300
max_reply_chars = 8
max_alias_chars = 16
min_match_score = 75
min_semantic_length = 1
require_single_chunk = true
[prompt_compact]
# 这里改成“常驻轻背景 + 相关增强”后,群长期摘要和成员轻画像都会稳定带给模型:
# 1. group_profile 放宽,让群长期摘要不会总被前面的模式/知识域说明挤掉;

View File

@@ -0,0 +1,160 @@
from __future__ import annotations
import time
from typing import Any, Dict, List, Optional
from db.emoji_asset_db import EmojiAssetDB
from utils.wechat.emoji_semantic_parser import (
dedupe_emoji_semantic_candidates,
extract_emoji_meta,
extract_emoji_semantic_info,
normalize_emoji_match_text,
safe_text,
)
class EmojiReplySelector:
"""自动回复表情替换选择器。
设计目标:
1. 自动回复模型仍然先产出自然文本,本地只在“极短情绪回复”场景里尝试替换成表情;
2. 选择逻辑完全基于表情库现有中文语义,不要求模型知道 md5
3. 一旦表情匹配失败或发送失败,主链路仍然可以无损回退到文本发送。
"""
def __init__(self, db_manager, config: Dict[str, Any] | None = None):
self.db_manager = db_manager
self.config = config or {}
self.enabled = bool(self.config.get("enable", True)) and db_manager is not None
self.asset_limit = max(int(self.config.get("asset_scan_limit", 800) or 800), 50)
self.cache_ttl_sec = max(int(self.config.get("cache_ttl_sec", 300) or 300), 30)
self.max_reply_chars = max(int(self.config.get("max_reply_chars", 8) or 8), 1)
self.max_alias_chars = max(int(self.config.get("max_alias_chars", 16) or 16), 1)
self.min_match_score = max(int(self.config.get("min_match_score", 75) or 75), 1)
self.min_semantic_length = max(int(self.config.get("min_semantic_length", 1) or 1), 1)
self.require_single_chunk = bool(self.config.get("require_single_chunk", True))
self.asset_db = EmojiAssetDB(db_manager) if db_manager is not None else None
self._cache_assets: List[Dict[str, Any]] = []
self._cache_expires_at = 0.0
def match_reply_to_emoji(self, reply_text: str, reply_chunks: List[str] | None = None) -> Optional[Dict[str, Any]]:
"""根据最终回复文本挑选最合适的表情资产。
说明:
1. 只处理很短的一句式情绪回复,避免把正常答疑误替换成表情;
2. 匹配优先级是:完全相等 > 语义前后包含 > 去语气词后的近似命中;
3. 返回值里直接带上 md5 / total_length主流程可以立刻发送。
"""
if not self.enabled:
return None
chunks = [chunk for chunk in (reply_chunks or []) if safe_text(chunk).strip()]
if self.require_single_chunk and len(chunks) > 1:
return None
raw_text = safe_text(reply_text).strip()
if not raw_text or len(raw_text) > self.max_reply_chars:
return None
normalized = normalize_emoji_match_text(raw_text)
if not normalized or len(normalized) < self.min_semantic_length:
return None
best_asset = None
best_score = -1
for asset in self._load_assets():
for alias in asset.get("semantic_aliases", []) or []:
score = self._score_alias_match(normalized, alias)
if score > best_score:
best_score = score
best_asset = asset
if not best_asset or best_score < self.min_match_score:
return None
return {
"md5": best_asset.get("md5", ""),
"total_length": int(best_asset.get("total_length") or 0),
"semantic_text": best_asset.get("semantic_text", ""),
"semantic_aliases": best_asset.get("semantic_aliases", []) or [],
"match_score": best_score,
}
def _load_assets(self) -> List[Dict[str, Any]]:
"""加载并缓存可用于自动回复的表情资产。"""
if not self.enabled or self.asset_db is None:
return []
now = time.time()
if self._cache_assets and now < self._cache_expires_at:
return self._cache_assets
rows = self.asset_db.get_recent_emoji_assets(limit=self.asset_limit)
assets: Dict[str, Dict[str, Any]] = {}
for row in rows:
attachment_url = safe_text(row.get("attachment_url"))
md5, total_length = extract_emoji_meta(attachment_url)
if not md5 or total_length <= 0:
continue
semantic_info = extract_emoji_semantic_info(attachment_url)
semantic_aliases = [
alias
for alias in (semantic_info.get("semantic_aliases") or [])
if len(alias) <= self.max_alias_chars
]
if not semantic_aliases:
continue
target = assets.setdefault(md5, {
"md5": md5,
"total_length": total_length,
"semantic_text": "",
"semantic_aliases": [],
})
if not target.get("total_length") and total_length > 0:
target["total_length"] = total_length
if not target.get("semantic_text") and semantic_info.get("semantic_text"):
target["semantic_text"] = semantic_info.get("semantic_text")
target["semantic_aliases"] = dedupe_emoji_semantic_candidates(
list(target.get("semantic_aliases") or []) + semantic_aliases
)
self._cache_assets = [asset for asset in assets.values() if asset.get("semantic_aliases")]
self._cache_expires_at = now + self.cache_ttl_sec
return self._cache_assets
def _score_alias_match(self, normalized_reply: str, alias: str) -> int:
"""给“回复文本 vs 表情语义”打匹配分。
分值设计:
1. 完全相等最高,优先替换像“哈哈哈 -> 哈哈哈表情”这种明确命中;
2. 前后包含次之,覆盖“哈哈 -> 哈哈哈”“就离谱啊 -> 就离谱”;
3. 去掉句尾语气词后的相等再次兜底,兼容“哇啊”“害呀”这类自然口语。
"""
normalized_alias = normalize_emoji_match_text(alias)
if not normalized_reply or not normalized_alias:
return 0
if normalized_reply == normalized_alias:
return 100
stripped_reply = self._strip_modal_suffix(normalized_reply)
stripped_alias = self._strip_modal_suffix(normalized_alias)
if stripped_reply and stripped_reply == normalized_alias:
return 96
if stripped_reply and stripped_reply == stripped_alias:
return 94
if normalized_reply in normalized_alias or normalized_alias in normalized_reply:
overlap = min(len(normalized_reply), len(normalized_alias))
return 82 + min(overlap, 10)
return 0
@staticmethod
def _strip_modal_suffix(text: str) -> str:
"""去掉常见句尾语气字,减少口语扰动。"""
normalized = safe_text(text)
while normalized and normalized[-1] in {"", "", "", "", "", "", "", "", ""}:
normalized = normalized[:-1]
return normalized

View File

@@ -28,6 +28,7 @@ def build_user_prompt(context: Dict, memory_hints: Dict) -> str:
"规则优先级:当前发言可验证信息 > 群场景约束 > 人设措辞润色。",
"如果是明确问题,先给结论;只给第一层答案,不主动展开第二层解释。",
length_rule,
"如果最自然的回复只是短情绪词或短语气词,比如“哈哈”“哇”“害”“难道”,就只回那个短词,不要为了凑完整句硬补解释。",
"能少说就少说,优先像群友随口接一句,不要写成说明文。",
"回复总长度尽量控制在30字内确实需要补充时最多2句且总长度不超过55字。",
"禁止大段铺垫、总结腔、条目化回答。",

View File

@@ -38,6 +38,7 @@ from .memory.social_memory import SocialMemoryService
from .profile.group_profile import GroupProfileResolver
from .context.conversation_hints import build_conversation_hints
from .core.decision_flow import DecisionFlow
from .core.emoji_reply import EmojiReplySelector
from .core.triggers import TriggerRouter
from .core.llm_result_parser import LLMResultParser
from .core.reply_formatter import finalize_reply, preview_text
@@ -101,6 +102,7 @@ class AIAutoResponsePlugin(MessagePluginInterface):
self.queue_maxsize = 200
self.queue_workers: List[asyncio.Task] = []
self.reply_limits: Dict[str, Any] = {}
self.emoji_reply_config: Dict[str, Any] = {}
self.prompt_compact_config: Dict[str, Any] = {}
self.message_expire_sec = 0.0
self.room_message_seq_counter = 0
@@ -142,8 +144,10 @@ class AIAutoResponsePlugin(MessagePluginInterface):
self.mode_config = self._config.get("mode", {}) or {}
self.cooldown_config = self._config.get("cooldown", {}) or {}
self.reply_limits = self._config.get("reply", {}) or {}
self.emoji_reply_config = self._config.get("emoji_reply", {}) or {}
self.prompt_compact_config = self._config.get("prompt_compact", {}) or {}
self.cooldown = CooldownManager(self.cooldown_config)
self.emoji_reply_selector = EmojiReplySelector(self.db_manager, self.emoji_reply_config)
self.image_config = self._config.get("image", {}) or {}
self.spam_config = self._config.get("spam_guard", {}) or {}
runtime_config = self._config.get("runtime", {}) or {}
@@ -681,8 +685,37 @@ class AIAutoResponsePlugin(MessagePluginInterface):
)
return False, "duplicate_reply"
for chunk in reply_chunks:
await bot.send_text_message(room_id, chunk, sender)
# 这里让“自动回复文本”先经过一次本地表情匹配:
# 1. 模型仍然只负责输出自然语言,不需要知道 md5
# 2. 只有命中中文语义库且回复足够短时,才会切换成表情发送;
# 3. 若表情发送失败,立刻回退到原始文本,避免因为表情链路影响主回复成功率。
sent_as_emoji = False
emoji_asset = self.emoji_reply_selector.match_reply_to_emoji(final_response_text, reply_chunks)
if emoji_asset and emoji_asset.get("md5") and int(emoji_asset.get("total_length") or 0) > 0:
try:
await bot.send_emoji_message(
room_id,
str(emoji_asset.get("md5")),
int(emoji_asset.get("total_length") or 0),
)
sent_as_emoji = True
except Exception as emoji_error:
self._log_event(
"emoji_fallback",
room_id=room_id,
sender=sender,
trigger_type=trigger.trigger_type,
reply_mode=reply_mode,
topic=selected_topic,
response_preview=preview_text(final_response_text),
emoji_semantic=emoji_asset.get("semantic_text", ""),
emoji_match_score=emoji_asset.get("match_score", 0),
error=str(emoji_error),
)
if not sent_as_emoji:
for chunk in reply_chunks:
await bot.send_text_message(room_id, chunk, sender)
self.cooldown.note_reply(room_id)
self.flow_manager.note_bot_reply(room_id)
self.memory_store.note_bot_reply(room_id, sender, selected_topic)
@@ -698,6 +731,9 @@ class AIAutoResponsePlugin(MessagePluginInterface):
response_preview=preview_text(final_response_text),
response_len=len(final_response_text),
chunk_count=len(reply_chunks),
sent_as_emoji=yn(sent_as_emoji),
emoji_semantic=(emoji_asset or {}).get("semantic_text", ""),
emoji_match_score=(emoji_asset or {}).get("match_score", 0),
)
return False, "replied"
finally: