abot/db/member_digest_db.py

# -*- coding: utf-8 -*-
import json
from datetime import datetime
from typing import Dict, List, Optional

from db.base import BaseDBOperator
from db.connection import DBConnectionManager


class MemberDigestDBOperator(BaseDBOperator):
    """成员分层摘要数据库操作"""

    def __init__(self, db_manager: DBConnectionManager):
        super().__init__(db_manager)
        self._create_tables()

    def _create_tables(self):
        try:
            self.execute_update("""
            CREATE TABLE IF NOT EXISTS t_member_digest (
                id INT AUTO_INCREMENT PRIMARY KEY,
                chatroom_id VARCHAR(64) NOT NULL COMMENT '群聊ID',
                wxid VARCHAR(64) NOT NULL COMMENT '成员微信ID',
                digest_type VARCHAR(16) NOT NULL COMMENT '摘要类型 daily|weekly|monthly',
                period_key VARCHAR(32) NOT NULL COMMENT '周期主键',
                period_start DATETIME NULL COMMENT '周期开始时间',
                period_end DATETIME NULL COMMENT '周期结束时间',
                display_name VARCHAR(128) COMMENT '成员展示名',
                source_count INT DEFAULT 0 COMMENT '源数据条数',
                summary_text TEXT COMMENT '摘要说明',
                structured_json LONGTEXT COMMENT '结构化摘要JSON',
                meta_json LONGTEXT COMMENT '附加元数据JSON',
                last_generated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '最后生成时间',
                create_time DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
                update_time DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
                UNIQUE KEY idx_member_digest (chatroom_id, wxid, digest_type, period_key),
                KEY idx_digest_lookup (chatroom_id, wxid, digest_type, period_end)
            ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='成员分层摘要表';
            """)
            self.execute_update("""
            CREATE TABLE IF NOT EXISTS t_member_digest_group_state (
                id INT AUTO_INCREMENT PRIMARY KEY,
                chatroom_id VARCHAR(64) NOT NULL COMMENT '群聊ID',
                bootstrap_status VARCHAR(16) NOT NULL DEFAULT 'pending' COMMENT '初始化状态 pending|done|empty',
                last_bootstrap_at DATETIME NULL COMMENT '最近一次初始化尝试时间',
                bootstrap_days INT NOT NULL DEFAULT 0 COMMENT '最近一次初始化窗口天数',
                built_daily_count INT NOT NULL DEFAULT 0 COMMENT '最近一次初始化生成的日摘要数量',
                touched_member_count INT NOT NULL DEFAULT 0 COMMENT '最近一次初始化触达的成员数量',
                extra_json LONGTEXT NULL COMMENT '附加状态信息',
                create_time DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
                update_time DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
                UNIQUE KEY idx_digest_group_state (chatroom_id)
            ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='成员分层摘要群级状态表';
            """)
        except Exception as e:
            self.LOG.error(f"创建成员分层摘要表失败: {e}")

    def save_digest(self, digest: Dict) -> bool:
        try:
            data = {
                "chatroom_id": digest.get("chatroom_id", ""),
                "wxid": digest.get("wxid", ""),
                "digest_type": digest.get("digest_type", ""),
                "period_key": digest.get("period_key", ""),
                "period_start": digest.get("period_start"),
                "period_end": digest.get("period_end"),
                "display_name": digest.get("display_name", ""),
                "source_count": digest.get("source_count", 0),
                "summary_text": digest.get("summary_text", ""),
                "structured_json": json.dumps(digest.get("structured", {}), ensure_ascii=False),
                "meta_json": json.dumps(digest.get("meta", {}), ensure_ascii=False),
                "last_generated_at": digest.get("last_generated_at", datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
            }
            fields = ", ".join(data.keys())
            placeholders = ", ".join(["%s"] * len(data))
            update_clause = ", ".join(
                [f"{key}=VALUES({key})" for key in data.keys() if key not in ("chatroom_id", "wxid", "digest_type", "period_key")]
            )
            sql = f"""
            INSERT INTO t_member_digest ({fields})
            VALUES ({placeholders})
            ON DUPLICATE KEY UPDATE {update_clause}
            """
            return self.execute_update(sql, tuple(data.values()))
        except Exception as e:
            self.LOG.error(f"保存成员分层摘要失败: {e}")
            return False

    def get_digest(self, chatroom_id: str, wxid: str, digest_type: str, period_key: str) -> Optional[Dict]:
        try:
            sql = """
            SELECT *
            FROM t_member_digest
            WHERE chatroom_id = %s AND wxid = %s AND digest_type = %s AND period_key = %s
            LIMIT 1
            """
            row = self.execute_query(sql, (chatroom_id, wxid, digest_type, period_key), fetch_one=True)
            return self._deserialize_row(row)
        except Exception as e:
            self.LOG.error(f"获取成员分层摘要失败: {e}")
            return None

    def list_digests(self, chatroom_id: str, wxid: str, digest_type: str, limit: int = 20) -> List[Dict]:
        try:
            sql = """
            SELECT *
            FROM t_member_digest
            WHERE chatroom_id = %s AND wxid = %s AND digest_type = %s
            ORDER BY period_end DESC, period_key DESC
            LIMIT %s
            """
            rows = self.execute_query(sql, (chatroom_id, wxid, digest_type, limit)) or []
            return [self._deserialize_row(row) for row in rows]
        except Exception as e:
            self.LOG.error(f"获取成员分层摘要列表失败: {e}")
            return []

    def list_digest_keys(self, chatroom_id: str, wxid: str, digest_type: str) -> List[str]:
        try:
            sql = """
            SELECT period_key
            FROM t_member_digest
            WHERE chatroom_id = %s AND wxid = %s AND digest_type = %s
            """
            rows = self.execute_query(sql, (chatroom_id, wxid, digest_type)) or []
            return [str(row.get("period_key")) for row in rows if row.get("period_key")]
        except Exception as e:
            self.LOG.error(f"获取成员摘要key失败: {e}")
            return []

    def list_period_digests(self, chatroom_id: str, wxid: str, digest_type: str,
                            period_keys: List[str]) -> List[Dict]:
        try:
            if not period_keys:
                return []
            placeholders = ", ".join(["%s"] * len(period_keys))
            sql = f"""
            SELECT *
            FROM t_member_digest
            WHERE chatroom_id = %s AND wxid = %s AND digest_type = %s AND period_key IN ({placeholders})
            ORDER BY period_end ASC, period_key ASC
            """
            params = (chatroom_id, wxid, digest_type, *period_keys)
            rows = self.execute_query(sql, params) or []
            return [self._deserialize_row(row) for row in rows]
        except Exception as e:
            self.LOG.error(f"批量获取成员分层摘要失败: {e}")
            return []

    def get_group_state(self, chatroom_id: str) -> Optional[Dict]:
        try:
            sql = """
            SELECT *
            FROM t_member_digest_group_state
            WHERE chatroom_id = %s
            LIMIT 1
            """
            row = self.execute_query(sql, (chatroom_id,), fetch_one=True)
            if not row:
                return None
            extra_json = row.get("extra_json")
            if extra_json:
                try:
                    row["extra"] = json.loads(extra_json)
                except Exception:
                    row["extra"] = {}
            else:
                row["extra"] = {}
            for key in ("last_bootstrap_at", "create_time", "update_time"):
                value = row.get(key)
                if isinstance(value, datetime):
                    row[key] = value.strftime("%Y-%m-%d %H:%M:%S")
            return row
        except Exception as e:
            self.LOG.error(f"获取成员分层摘要群状态失败: {e}")
            return None

    def save_group_state(self, chatroom_id: str, bootstrap_status: str,
                         bootstrap_days: int = 0, built_daily_count: int = 0,
                         touched_member_count: int = 0, extra: Optional[Dict] = None) -> bool:
        try:
            data = {
                "chatroom_id": chatroom_id,
                "bootstrap_status": bootstrap_status,
                "last_bootstrap_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                "bootstrap_days": int(bootstrap_days or 0),
                "built_daily_count": int(built_daily_count or 0),
                "touched_member_count": int(touched_member_count or 0),
                "extra_json": json.dumps(extra or {}, ensure_ascii=False),
            }
            fields = ", ".join(data.keys())
            placeholders = ", ".join(["%s"] * len(data))
            update_clause = ", ".join(
                [f"{key}=VALUES({key})" for key in data.keys() if key != "chatroom_id"]
            )
            sql = f"""
            INSERT INTO t_member_digest_group_state ({fields})
            VALUES ({placeholders})
            ON DUPLICATE KEY UPDATE {update_clause}
            """
            return self.execute_update(sql, tuple(data.values()))
        except Exception as e:
            self.LOG.error(f"保存成员分层摘要群状态失败: {e}")
            return False

    @staticmethod
    def _deserialize_row(row: Optional[Dict]) -> Optional[Dict]:
        if not row:
            return row

        for key in ("structured_json", "meta_json"):
            value = row.get(key)
            if not value:
                row[key] = {}
                continue
            try:
                row[key] = json.loads(value)
            except Exception:
                row[key] = {}

        for key in ("period_start", "period_end", "last_generated_at"):
            value = row.get(key)
            if isinstance(value, datetime):
                row[key] = value.strftime("%Y-%m-%d %H:%M:%S")

        row["structured"] = row.get("structured_json", {})
        row["meta"] = row.get("meta_json", {})
        return row