持久化表情中文语义资产\n\n- 新增 t_emoji_assets 表及迁移脚本,持久化保存表情发送参数、中文语义与预览图路径\n- 在消息归档与媒体补偿流程中自动回填表情资产,实现收到表情即落语义、补图后回填预览\n- 后台表情库与自动回复优先读取持久化表情资产,仅在空表场景下小范围回补历史数据

This commit is contained in:
liuwei
2026-04-27 11:52:31 +08:00
parent 623ca505d4
commit 62e6f67836
7 changed files with 334 additions and 172 deletions

View File

@@ -6,8 +6,6 @@ from typing import Any, Dict, List, Optional
from db.emoji_asset_db import EmojiAssetDB
from utils.wechat.emoji_semantic_parser import (
dedupe_emoji_semantic_candidates,
extract_emoji_meta,
extract_emoji_semantic_info,
normalize_emoji_match_text,
safe_text,
)
@@ -88,38 +86,29 @@ class EmojiReplySelector:
if self._cache_assets and now < self._cache_expires_at:
return self._cache_assets
rows = self.asset_db.get_recent_emoji_assets(limit=self.asset_limit)
assets: Dict[str, Dict[str, Any]] = {}
for row in rows:
attachment_url = safe_text(row.get("attachment_url"))
md5, total_length = extract_emoji_meta(attachment_url)
if not md5 or total_length <= 0:
continue
semantic_info = extract_emoji_semantic_info(attachment_url)
assets = self.asset_db.list_emoji_assets(limit=self.asset_limit, require_preview=False)
if not assets:
# 只有持久化资产表还是空时,才回补一小批最近历史;
# 正常情况下,归档和媒体补偿流程会自动维护这张表,自动回复不该在每次加载时去扫原始消息。
self.asset_db.sync_recent_emoji_assets(limit=min(self.asset_limit, 120))
assets = self.asset_db.list_emoji_assets(limit=self.asset_limit, require_preview=False)
normalized_assets: List[Dict[str, Any]] = []
for asset in assets:
semantic_aliases = [
alias
for alias in (semantic_info.get("semantic_aliases") or [])
for alias in (asset.get("semantic_aliases") or [])
if len(alias) <= self.max_alias_chars
]
if not semantic_aliases:
continue
target = assets.setdefault(md5, {
"md5": md5,
"total_length": total_length,
"semantic_text": "",
"semantic_aliases": [],
normalized_assets.append({
"md5": asset.get("md5", ""),
"total_length": int(asset.get("total_length") or 0),
"semantic_text": asset.get("semantic_text", ""),
"semantic_aliases": dedupe_emoji_semantic_candidates(semantic_aliases),
})
if not target.get("total_length") and total_length > 0:
target["total_length"] = total_length
if not target.get("semantic_text") and semantic_info.get("semantic_text"):
target["semantic_text"] = semantic_info.get("semantic_text")
target["semantic_aliases"] = dedupe_emoji_semantic_candidates(
list(target.get("semantic_aliases") or []) + semantic_aliases
)
self._cache_assets = [asset for asset in assets.values() if asset.get("semantic_aliases")]
self._cache_assets = [asset for asset in normalized_assets if asset.get("semantic_aliases")]
self._cache_expires_at = now + self.cache_ttl_sec
return self._cache_assets