在pyproject.toml中添加langchain依赖，并在strategies.py中引入PromptTemplate以支持数据清洗功能的实现。

xming521 · xming521 · commit fec538fea6c0 · 2025-05-03T22:57:36.000+08:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -16,6 +16,7 @@ dependencies = [
   "torch>=2.6.0",
   "transformers==4.49.0",
   "tomli; python_version < '3.11'",
+  "langchain",
 ]
 
 [tool.weclone]
diff --git a/weclone/data/clean/strategies.py b/weclone/data/clean/strategies.py
@@ -1,7 +1,8 @@
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from typing import Any, Dict
-# from ..models import ChatMessage # 如果需要操作特定模型，取消注释并调整
+from langchain_core.prompts import PromptTemplate
+from weclone.prompts.clean_data import CLEAN_PROMPT
 
 
 @dataclass
@@ -28,16 +29,9 @@ def clean(self, data: Any) -> Any:
 class LLMCleaningStrategy(CleaningStrategy):
     """使用大模型进行数据清洗的策略"""
 
-    # 这里可以添加LLM相关的配置，例如模型名称、API密钥等
-    # model_name: str = "your_llm_model"
-
     def clean(self, data: Any) -> Any:
-        """
-        使用大模型清洗数据。
-        具体的实现需要根据您选择的LLM API和清洗任务来定。
-        """
-        # 此处为调用LLM进行清洗的逻辑占位符
-        print(f"使用LLM清洗数据: {data}")
-        # 假设LLM返回了清洗后的数据
-        cleaned_data = f"LLM cleaned: {data}"  # 示例返回值
-        return cleaned_data
+        prompt_template = PromptTemplate.from_template(CLEAN_PROMPT)
+
+        prompt_template.invoke({"topic": "cats"})
+        # prompt_template.
+        # return cleaned_data
diff --git a/weclone/prompts/__init__.py b/weclone/prompts/__init__.py
diff --git a/weclone/prompts/clean_data.py b/weclone/prompts/clean_data.py
@@ -0,0 +1,5 @@
+CLEAN_PROMPT = """
+请根据以下文本生成一个简洁的摘要：
+{text_input}
+摘要应包含关键信息，长度不超过三句话。
+"""

Original file line number	Diff line number	Diff line change
`@@ -16,6 +16,7 @@ dependencies = [`
`16`	`16`	`"torch>=2.6.0",`
`17`	`17`	`"transformers==4.49.0",`
`18`	`18`	`"tomli; python_version < '3.11'",`
	`19`	`+ "langchain",`
`19`	`20`	`]`
`20`	`21`
`21`	`22`	`[tool.weclone]`