放宽最近上下文到30条并取消中途截断
This commit is contained in:
@@ -70,10 +70,14 @@ default_total_limit = 12
|
|||||||
[prompt_compact]
|
[prompt_compact]
|
||||||
group_profile_max_chars = 220
|
group_profile_max_chars = 220
|
||||||
group_profile_max_lines = 6
|
group_profile_max_lines = 6
|
||||||
context_max_chars = 360
|
# 最近上下文现在要真实交给模型 30 条,因此这里同步放宽整体上下文裁剪阈值:
|
||||||
context_max_lines = 10
|
# 1. recent_message_max_lines 提到 30,避免“窗口明明有 30,提示词里只留下 4 条”;
|
||||||
recent_message_max_lines = 4
|
# 2. context_max_lines/context_max_chars 一起抬高,避免最近消息刚拼进去又被整体截断;
|
||||||
recent_message_line_max_chars = 60
|
# 3. recent_message_line_max_chars 稍微放宽,让模型能看到每条消息更多细节,但仍避免单条刷屏。
|
||||||
|
context_max_chars = 4200
|
||||||
|
context_max_lines = 40
|
||||||
|
recent_message_max_lines = 30
|
||||||
|
recent_message_line_max_chars = 100
|
||||||
at_member_profile_max_chars = 160
|
at_member_profile_max_chars = 160
|
||||||
at_member_profile_max_lines = 5
|
at_member_profile_max_lines = 5
|
||||||
member_memory_max_chars = 180
|
member_memory_max_chars = 180
|
||||||
|
|||||||
@@ -94,38 +94,14 @@ class ContextBuilder:
|
|||||||
) -> List[Dict]:
|
) -> List[Dict]:
|
||||||
if not recent_messages:
|
if not recent_messages:
|
||||||
return []
|
return []
|
||||||
|
# 这里直接把“最近 N 条”原样交给后续提示词层,而不是再做一次相关性裁剪:
|
||||||
|
# 1. 用户明确要求给模型 30 条最近消息,方便推断群里正在讨论的上下文;
|
||||||
|
# 2. 之前的“相关性筛选 + 尾部保留”虽然更省 token,但会打断对话连续性;
|
||||||
|
# 3. 对群聊场景来说,连续现场通常比少量高分片段更有利于模型判断谁在接谁的话。
|
||||||
|
#
|
||||||
|
# 这里仍保留签名参数不动,是为了兼容上层调用,避免后续改动牵连太多。
|
||||||
window = recent_messages[-self.recent_context_size:]
|
window = recent_messages[-self.recent_context_size:]
|
||||||
if len(window) <= 8:
|
return window
|
||||||
return window
|
|
||||||
|
|
||||||
current_tokens = self._extract_topic_tokens(current_content)
|
|
||||||
quote_tokens = self._extract_topic_tokens(
|
|
||||||
f"{quote_context.get('title', '')} {quote_context.get('quote_body', '')}"
|
|
||||||
)
|
|
||||||
focus_tokens = current_tokens | quote_tokens
|
|
||||||
quote_sender_name = str(quote_context.get("quote_sender_name", "") or "").strip().lower()
|
|
||||||
|
|
||||||
scored: List[tuple[int, int, Dict]] = []
|
|
||||||
for idx, item in enumerate(window):
|
|
||||||
score = self._message_relevance(
|
|
||||||
item,
|
|
||||||
current_sender=current_sender,
|
|
||||||
focus_tokens=focus_tokens,
|
|
||||||
quote_sender_name=quote_sender_name,
|
|
||||||
)
|
|
||||||
if score > 0:
|
|
||||||
scored.append((score, idx, item))
|
|
||||||
|
|
||||||
# 总是保留尾部几条,维持现场感;再拼上与当前话题最相关的消息。
|
|
||||||
tail_indexes = set(range(max(len(window) - 4, 0), len(window)))
|
|
||||||
keep_indexes = set(tail_indexes)
|
|
||||||
for _, idx, _ in sorted(scored, key=lambda x: (-x[0], -x[1]))[:10]:
|
|
||||||
keep_indexes.add(idx)
|
|
||||||
|
|
||||||
selected = [window[idx] for idx in sorted(keep_indexes)]
|
|
||||||
if len(selected) < 6:
|
|
||||||
return window[-6:]
|
|
||||||
return selected[-12:]
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _message_relevance(
|
def _message_relevance(
|
||||||
|
|||||||
@@ -852,7 +852,16 @@ class AIAutoResponsePlugin(MessagePluginInterface):
|
|||||||
"最近上下文",
|
"最近上下文",
|
||||||
self._join_recent_messages(
|
self._join_recent_messages(
|
||||||
context,
|
context,
|
||||||
max_lines=int(prompt_strategy.get("recent_message_max_lines", 4) or 4),
|
# 这里优先走 prompt_strategy,是为了让“给模型看多少条最近消息”由策略层统一控制;
|
||||||
|
# 如果策略层没有明确给值,再退回配置里的 recent_message_max_lines,
|
||||||
|
# 避免出现“配置已经改成 30,但这里还偷偷按 4 条截断”的问题。
|
||||||
|
max_lines=int(
|
||||||
|
prompt_strategy.get(
|
||||||
|
"recent_message_max_lines",
|
||||||
|
self.prompt_compact_config.get("recent_message_max_lines", 30),
|
||||||
|
)
|
||||||
|
or 30
|
||||||
|
),
|
||||||
max_line_chars=int(self.prompt_compact_config.get("recent_message_line_max_chars", 60) or 60),
|
max_line_chars=int(self.prompt_compact_config.get("recent_message_line_max_chars", 60) or 60),
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
@@ -1052,7 +1061,19 @@ class AIAutoResponsePlugin(MessagePluginInterface):
|
|||||||
# 3. 群事实和向量记忆只在问答场景打开,避免模型把记忆写进每句闲聊。
|
# 3. 群事实和向量记忆只在问答场景打开,避免模型把记忆写进每句闲聊。
|
||||||
target_reply_chars_map = {"social_short": 10, "qa_fast": 16, "qa_with_context": 24}
|
target_reply_chars_map = {"social_short": 10, "qa_fast": 16, "qa_with_context": 24}
|
||||||
hard_reply_cap_map = {"social_short": 12, "qa_fast": 18, "qa_with_context": 28}
|
hard_reply_cap_map = {"social_short": 12, "qa_fast": 18, "qa_with_context": 28}
|
||||||
recent_lines_map = {"social_short": 4, "qa_fast": 5, "qa_with_context": 6}
|
# 最近消息条数不再按模式缩到 4~6 条,而是统一交给模型看完整窗口:
|
||||||
|
# 1. 回复仍然走短句限制,避免“上下文多了,回复也跟着变长”;
|
||||||
|
# 2. 但模型理解当前讨论时,需要看到完整现场,尤其是多人连续接话场景;
|
||||||
|
# 3. 默认读取 prompt_compact.recent_message_max_lines,这样配置和策略不会打架。
|
||||||
|
configured_recent_lines = max(
|
||||||
|
int(self.prompt_compact_config.get("recent_message_max_lines", 30) or 30),
|
||||||
|
1,
|
||||||
|
)
|
||||||
|
recent_lines_map = {
|
||||||
|
"social_short": configured_recent_lines,
|
||||||
|
"qa_fast": configured_recent_lines,
|
||||||
|
"qa_with_context": configured_recent_lines,
|
||||||
|
}
|
||||||
|
|
||||||
allow_member_memory = strong_directed or is_followup or returning_state in {"returning_member", "long_absent_member"}
|
allow_member_memory = strong_directed or is_followup or returning_state in {"returning_member", "long_absent_member"}
|
||||||
allow_social_memory = is_question_like and strong_directed
|
allow_social_memory = is_question_like and strong_directed
|
||||||
|
|||||||
Reference in New Issue
Block a user