Skip to content

Commit fec538f

Browse files
committed
在pyproject.toml中添加langchain依赖,并在strategies.py中引入PromptTemplate以支持数据清洗功能的实现。
1 parent 810ca43 commit fec538f

File tree

4 files changed

+13
-13
lines changed

4 files changed

+13
-13
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ dependencies = [
1616
"torch>=2.6.0",
1717
"transformers==4.49.0",
1818
"tomli; python_version < '3.11'",
19+
"langchain",
1920
]
2021

2122
[tool.weclone]

weclone/data/clean/strategies.py

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
from abc import ABC, abstractmethod
22
from dataclasses import dataclass
33
from typing import Any, Dict
4-
# from ..models import ChatMessage # 如果需要操作特定模型,取消注释并调整
4+
from langchain_core.prompts import PromptTemplate
5+
from weclone.prompts.clean_data import CLEAN_PROMPT
56

67

78
@dataclass
@@ -28,16 +29,9 @@ def clean(self, data: Any) -> Any:
2829
class LLMCleaningStrategy(CleaningStrategy):
2930
"""使用大模型进行数据清洗的策略"""
3031

31-
# 这里可以添加LLM相关的配置,例如模型名称、API密钥等
32-
# model_name: str = "your_llm_model"
33-
3432
def clean(self, data: Any) -> Any:
35-
"""
36-
使用大模型清洗数据。
37-
具体的实现需要根据您选择的LLM API和清洗任务来定。
38-
"""
39-
# 此处为调用LLM进行清洗的逻辑占位符
40-
print(f"使用LLM清洗数据: {data}")
41-
# 假设LLM返回了清洗后的数据
42-
cleaned_data = f"LLM cleaned: {data}" # 示例返回值
43-
return cleaned_data
33+
prompt_template = PromptTemplate.from_template(CLEAN_PROMPT)
34+
35+
prompt_template.invoke({"topic": "cats"})
36+
# prompt_template.
37+
# return cleaned_data

weclone/prompts/__init__.py

Whitespace-only changes.

weclone/prompts/clean_data.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
CLEAN_PROMPT = """
2+
请根据以下文本生成一个简洁的摘要:
3+
{text_input}
4+
摘要应包含关键信息,长度不超过三句话。
5+
"""

0 commit comments

Comments
 (0)