From 909018b3ec0bf66620cb1adbbf3759ab678d6ab0 Mon Sep 17 00:00:00 2001
From: liuwei <liuwei@wdtrgf.com.cn>
Date: Mon, 14 Apr 2025 12:09:10 +0800
Subject: [PATCH] =?UTF-8?q?feature=EF=BC=9A=E5=8A=A0=E5=85=A5=E5=8A=9F?=
 =?UTF-8?q?=E8=83=BD=EF=BC=8C=E5=B0=8F=E6=9C=8B=E5=8F=8B=E4=BA=BA=E8=84=B8?=
 =?UTF-8?q?=E8=AF=86=E5=88=AB=E4=B8=8E=E5=88=86=E7=B1=BB=E3=80=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 plugins/kid_photo_extractor/main.py | 74 +++++++++++++++++++++++++----
 1 file changed, 64 insertions(+), 10 deletions(-)

diff --git a/plugins/kid_photo_extractor/main.py b/plugins/kid_photo_extractor/main.py
index bbfc1e0..7f6805d 100644
--- a/plugins/kid_photo_extractor/main.py
+++ b/plugins/kid_photo_extractor/main.py
@@ -101,13 +101,34 @@ class FaceAnalyzer:
                 return None
     
             # 提取人脸特征向量用于后续比对
-            embedding = DeepFace.represent(
+            embedding_result = DeepFace.represent(
                 img_path=image_path,
                 model_name='Facenet',
                 enforce_detection=False,
                 detector_backend='opencv'  # 使用更快的opencv检测器
             )
             
+            # 处理embedding结果，确保它是一个数值数组
+            embedding = None
+            if isinstance(embedding_result, list) and len(embedding_result) > 0:
+                if isinstance(embedding_result[0], dict) and 'embedding' in embedding_result[0]:
+                    embedding = embedding_result[0]['embedding']
+                else:
+                    embedding = embedding_result[0]
+            elif isinstance(embedding_result, dict) and 'embedding' in embedding_result:
+                embedding = embedding_result['embedding']
+            else:
+                embedding = embedding_result
+                
+            # 确保embedding是数值列表
+            if embedding is not None:
+                try:
+                    # 尝试转换为浮点数列表
+                    embedding = [float(x) for x in embedding]
+                except (TypeError, ValueError):
+                    self.logger.error(f"无法将嵌入向量转换为浮点数列表: {image_path}")
+                    return None
+            
             self.logger.info(f"成功提取人脸特征向量: {image_path}")
     
             # 如果使用了临时文件，删除它
@@ -142,7 +163,6 @@ class FaceGrouper:
         self.min_samples = min_samples  # 形成核心点所需的最小样本数
         self.logger = logging.getLogger("Plugin.KidPhotoExtractor.FaceGrouper")
 
-    # 在 FaceGrouper 类的 cluster_faces 方法中，优化性能和错误处理
     def cluster_faces(self, face_embeddings):
         """对人脸特征向量进行聚类"""
         if not face_embeddings:
@@ -153,14 +173,36 @@ class FaceGrouper:
             return [0] * len(face_embeddings)
     
         try:
-            # 将特征向量转换为numpy数组
-            embeddings_array = np.array(face_embeddings)
+            # 将特征向量转换为numpy数组，确保是浮点数类型
+            # 首先提取实际的嵌入向量数据
+            processed_embeddings = []
+            for emb in face_embeddings:
+                # DeepFace.represent() 可能返回字典或列表，需要提取实际的向量
+                if isinstance(emb, dict) and 'embedding' in emb:
+                    processed_embeddings.append(emb['embedding'])
+                elif isinstance(emb, list) and len(emb) > 0:
+                    # 如果是列表，取第一个元素
+                    if isinstance(emb[0], dict) and 'embedding' in emb[0]:
+                        processed_embeddings.append(emb[0]['embedding'])
+                    else:
+                        processed_embeddings.append(emb)
+                else:
+                    processed_embeddings.append(emb)
             
-            # 检查数据有效性
-            if np.isnan(embeddings_array).any() or np.isinf(embeddings_array).any():
-                self.logger.error("特征向量包含无效值(NaN或Inf)")
-                # 清理无效值
-                embeddings_array = np.nan_to_num(embeddings_array)
+            # 转换为numpy数组并确保是浮点数类型
+            embeddings_array = np.array(processed_embeddings, dtype=np.float64)
+            
+            # 安全地检查无效值
+            try:
+                has_nan = np.isnan(embeddings_array).any()
+                has_inf = np.isinf(embeddings_array).any()
+                if has_nan or has_inf:
+                    self.logger.error("特征向量包含无效值(NaN或Inf)")
+                    # 清理无效值
+                    embeddings_array = np.nan_to_num(embeddings_array)
+            except TypeError:
+                # 如果仍然无法检查NaN/Inf，记录警告并继续
+                self.logger.warning("无法检查特征向量中的无效值，将直接进行聚类")
     
             # 使用DBSCAN进行聚类
             clustering = DBSCAN(eps=self.eps, min_samples=self.min_samples, metric='euclidean').fit(embeddings_array)
@@ -654,9 +696,21 @@ class KidPhotoExtractorPlugin(MessagePluginInterface):
                 person_folder = os.path.join(output_dir, f"person_{person_id}")
                 os.makedirs(person_folder, exist_ok=True)
     
-                # 复制照片
+                # 复制照片 - 修改为只复制原始照片，不复制人脸区域
                 copied_photos = []
+                processed_paths = set()  # 用于跟踪已处理的照片路径，避免重复复制
+                
                 for image_path, _ in faces:
+                    # 跳过临时文件
+                    if ".temp." in image_path:
+                        continue
+                        
+                    # 避免重复复制同一张照片
+                    if image_path in processed_paths:
+                        continue
+                        
+                    processed_paths.add(image_path)
+                    
                     if self.photo_classifier.copy_photo(image_path, person_folder):
                         copied_photos.append(os.path.basename(image_path))
                         # 保存照片映射关系