持久化表情中文语义资产\n\n- 新增 t_emoji_assets 表及迁移脚本,持久化保存表情发送参数、中文语义与预览图路径\n- 在消息归档与媒体补偿流程中自动回填表情资产,实现收到表情即落语义、补图后回填预览\n- 后台表情库与自动回复优先读取持久化表情资产,仅在空表场景下小范围回补历史数据
This commit is contained in:
@@ -6,8 +6,6 @@ from typing import Any, Dict, List, Optional
|
||||
from db.emoji_asset_db import EmojiAssetDB
|
||||
from utils.wechat.emoji_semantic_parser import (
|
||||
dedupe_emoji_semantic_candidates,
|
||||
extract_emoji_meta,
|
||||
extract_emoji_semantic_info,
|
||||
normalize_emoji_match_text,
|
||||
safe_text,
|
||||
)
|
||||
@@ -88,38 +86,29 @@ class EmojiReplySelector:
|
||||
if self._cache_assets and now < self._cache_expires_at:
|
||||
return self._cache_assets
|
||||
|
||||
rows = self.asset_db.get_recent_emoji_assets(limit=self.asset_limit)
|
||||
assets: Dict[str, Dict[str, Any]] = {}
|
||||
for row in rows:
|
||||
attachment_url = safe_text(row.get("attachment_url"))
|
||||
md5, total_length = extract_emoji_meta(attachment_url)
|
||||
if not md5 or total_length <= 0:
|
||||
continue
|
||||
|
||||
semantic_info = extract_emoji_semantic_info(attachment_url)
|
||||
assets = self.asset_db.list_emoji_assets(limit=self.asset_limit, require_preview=False)
|
||||
if not assets:
|
||||
# 只有持久化资产表还是空时,才回补一小批最近历史;
|
||||
# 正常情况下,归档和媒体补偿流程会自动维护这张表,自动回复不该在每次加载时去扫原始消息。
|
||||
self.asset_db.sync_recent_emoji_assets(limit=min(self.asset_limit, 120))
|
||||
assets = self.asset_db.list_emoji_assets(limit=self.asset_limit, require_preview=False)
|
||||
normalized_assets: List[Dict[str, Any]] = []
|
||||
for asset in assets:
|
||||
semantic_aliases = [
|
||||
alias
|
||||
for alias in (semantic_info.get("semantic_aliases") or [])
|
||||
for alias in (asset.get("semantic_aliases") or [])
|
||||
if len(alias) <= self.max_alias_chars
|
||||
]
|
||||
if not semantic_aliases:
|
||||
continue
|
||||
|
||||
target = assets.setdefault(md5, {
|
||||
"md5": md5,
|
||||
"total_length": total_length,
|
||||
"semantic_text": "",
|
||||
"semantic_aliases": [],
|
||||
normalized_assets.append({
|
||||
"md5": asset.get("md5", ""),
|
||||
"total_length": int(asset.get("total_length") or 0),
|
||||
"semantic_text": asset.get("semantic_text", ""),
|
||||
"semantic_aliases": dedupe_emoji_semantic_candidates(semantic_aliases),
|
||||
})
|
||||
if not target.get("total_length") and total_length > 0:
|
||||
target["total_length"] = total_length
|
||||
if not target.get("semantic_text") and semantic_info.get("semantic_text"):
|
||||
target["semantic_text"] = semantic_info.get("semantic_text")
|
||||
target["semantic_aliases"] = dedupe_emoji_semantic_candidates(
|
||||
list(target.get("semantic_aliases") or []) + semantic_aliases
|
||||
)
|
||||
|
||||
self._cache_assets = [asset for asset in assets.values() if asset.get("semantic_aliases")]
|
||||
self._cache_assets = [asset for asset in normalized_assets if asset.get("semantic_aliases")]
|
||||
self._cache_expires_at = now + self.cache_ttl_sec
|
||||
return self._cache_assets
|
||||
|
||||
|
||||
Reference in New Issue
Block a user