import os import re import time import torch import torch.nn as nn import pandas as pd import numpy as np from datetime import datetime from torch.utils.data import Dataset, DataLoader, random_split from tqdm import tqdm import pickle import mysql.connector from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity from gensim.models import Word2Vec # SpaCy 用来取代 nltk 的低效文本处理 import spacy nlp = spacy.load("en_core_web_sm", disable=["parser", "ner"]) # 使用 gensim 的 Word2Vec 和 KeyedVectors from gensim.models import Word2Vec, TfidfModel from gensim.corpora import Dictionary # 自定义 Preprocess(快速版本) STOPWORDS = spacy.lang.en.stop_words.STOP_WORDS def clean_text(text): return re.sub(r'[^a-zA-Z0-9\s]', '', str(text)).strip().lower() def tokenize(text): doc = nlp(clean_text(text)) return [token.text for token in doc if token.text not in STOPWORDS and token.text.isalnum()] def preprocess(text): tokens = tokenize(text) return " ".join(tokens) class SemanticMatchModel(nn.Module): def __init__(self, input_dim): super().__init__() self.fc1 = nn.Linear(input_dim, 256) self.bn1 = nn.BatchNorm1d(256) self.fc2 = nn.Linear(256, 128) self.bn2 = nn.BatchNorm1d(128) self.fc3 = nn.Linear(128, 64) self.bn3 = nn.BatchNorm1d(64) self.fc4 = nn.Linear(64, 1) self.dropout = nn.Dropout(0.3) self.relu = nn.ReLU() self.sigmoid = nn.Sigmoid() def forward(self, x): x = self.relu(self.bn1(self.fc1(x))) x = self.dropout(x) x = self.relu(self.bn2(self.fc2(x))) x = self.dropout(x) x = self.relu(self.bn3(self.fc3(x))) x = self.dropout(x) x = self.sigmoid(self.fc4(x)) return x class QADataset(Dataset): """ 数据集:将正样本 (question, answer) 与随机负样本 (question, random_answer) 拼接在一起, 其中正样本 label=1,负样本 label=0。 """ def __init__(self, qa_pairs, tfidf_vectorizer, negative_ratio=1.0): """ :param qa_pairs: [(question_text, answer_text), ...] :param tfidf_vectorizer: 已经fit好的 TfidfVectorizer :param negative_ratio: 每个正样本对应的负样本倍数 """ self.qa_pairs = qa_pairs self.vectorizer = tfidf_vectorizer self.samples = [] # 构造正样本 for i, (q, a) in enumerate(self.qa_pairs): self.samples.append((q, a, 1)) # label=1 # 构建负样本:random替换answer if negative_ratio > 0: negative_samples = [] total_pairs = len(self.qa_pairs) for i, (q, a) in enumerate(self.qa_pairs): for _ in range(int(negative_ratio)): rand_idx = np.random.randint(total_pairs) # 若随机到同一个qa对,就重新随机 while rand_idx == i: rand_idx = np.random.randint(total_pairs) neg_q, neg_a = self.qa_pairs[rand_idx] # 保持question不变,随机替换答案 negative_samples.append((q, neg_a, 0)) self.samples.extend(negative_samples) def __len__(self): return len(self.samples) def __getitem__(self, idx): q, a, label = self.samples[idx] q_vec = self.vectorizer.transform([preprocess(q)]).toarray()[0] a_vec = self.vectorizer.transform([preprocess(a)]).toarray()[0] pair_vec = np.concatenate((q_vec, a_vec)) return torch.tensor(pair_vec, dtype=torch.float32), torch.tensor(label, dtype=torch.float32) class KnowledgeBase: def __init__(self, host='localhost', user='root', password='hy188747', database='ubuntu_qa', table='qa_pair', model_dir=r"D:\NLP-PT\PT4\model", negative_ratio=1.0): print("🔄 初始化知识库...") self.host = host self.user = user self.password = password self.database = database self.table = table self.model_dir = model_dir self.negative_ratio = negative_ratio # 确保模型目录存在 os.makedirs(self.model_dir, exist_ok=True) self.qa_pairs = [] self.q_texts = [] self.a_texts = [] self.semantic_model = None self.word2vec_model = None self.tfidf_vectorizer = None self.tfidf_matrix = None # 第一步:从数据库载入数据 self.load_data_from_mysql() # 第二步:加载或缓存预处理后的文本 self.load_or_cache_processed_questions() # 第三步:加载 TF-IDF + 向量化 self.load_cached_tfidf() # 第四步:加载 Word2Vec 或使用缓存 self.load_cached_word2vec_model() # 第五步:加载 PyTorch 模型 model_path = os.path.join(self.model_dir, 'semantic_match_model.pth') if os.path.exists(model_path): self.load_model() def load_data_from_mysql(self): print("🔄 正在连接 MySQL,加载问答数据...") conn = mysql.connector.connect( host=self.host, user=self.user, password=self.password, database=self.database ) cursor = conn.cursor() query = f"SELECT question_text, answer_text FROM {self.table}" cursor.execute(query) rows = cursor.fetchall() conn.close() self.qa_pairs = [(row[0], row[1]) for row in rows] self.q_texts = [pair[0] for pair in self.qa_pairs] self.a_texts = [pair[1] for pair in self.qa_pairs] print(f"✅ 成功从 MySQL 加载 {len(self.qa_pairs)} 条问答数据。") def load_or_cache_processed_questions(self): """使用本地缓存避免每次都预处理大量数据""" cache_path = os.path.join(self.model_dir, 'processed_questions.pkl') if os.path.exists(cache_path): print("🔄 使用缓存预处理后的分词文本。") with open(cache_path, 'rb') as f: self.processed_q_list = pickle.load(f) else: print("🔄 正在预处理问题文本(首次较慢)...") self.processed_q_list = [preprocess(q) for q in self.q_texts] with open(cache_path, 'wb') as f: pickle.dump(self.processed_q_list, f) print("✅ 预处理缓存已保存。") def load_cached_tfidf(self): """加载已存在的 TfidfVectorizer 或构建""" cache_tfidf_matrix = os.path.join(self.model_dir, 'tfidf_matrix.npz') cache_qa_list = os.path.join(self.model_dir, 'tfidf_qa.pkl') tfidf_path = os.path.join(self.model_dir, 'tfidf_vectorizer.pkl') if os.path.exists(tfidf_path) and os.path.exists(cache_tfidf_matrix) and os.path.exists(cache_qa_list): print("🔄 加载 TF-IDF 缓存版本。") import joblib self.tfidf_vectorizer = joblib.load(tfidf_path) self.tfidf_matrix = np.load(cache_tfidf_matrix)['tfidf'] with open(cache_qa_list, 'rb') as f: self.tfidf_qa = pickle.load(f) else: print("🔄 创建并构建 TF-IDF(首次较慢)...") self.tfidf_vectorizer = TfidfVectorizer( tokenizer=lambda x: x.split(), lowercase=False, max_features=10000 ) self.tfidf_qa = self.processed_q_list self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(self.tfidf_qa).toarray() print("✅ TF-IDF 构建完成。") import joblib joblib.dump(self.tfidf_vectorizer, tfidf_path) np.savez_compressed(cache_tfidf_matrix, tfidf=self.tfidf_matrix) with open(cache_qa_list, 'wb') as f: pickle.dump(self.tfidf_qa, f) def load_cached_word2vec_model(self): """加载已训练好的 Word2Vec 模型,没有就训练""" word2vec_path = os.path.join(self.model_dir, 'word2vec.model') if os.path.exists(word2vec_path): print("🔄 加载缓存中的 Word2Vec 模型...") self.word2vec_model = Word2Vec.load(word2vec_path) else: print("🔄 训练 Word2Vec 模型(首次较慢)...") tokenized_questions = [preprocess(q).split() for q in self.q_texts] self.word2vec_model = Word2Vec( sentences=tokenized_questions, vector_size=100, window=5, min_count=1, workers=4 ) self.word2vec_model.save(word2vec_path) print("✅ Word2Vec 模型训练完成并保存。") def sentence_to_vec(self, sentence): """将句子转换为向量表示""" tokens = preprocess(sentence).split() if self.word2vec_model: vecs = [self.word2vec_model.wv[w] for w in tokens if w in self.word2vec_model.wv] return np.mean(vecs, axis=0) if vecs else np.zeros(self.word2vec_model.vector_size) else: # 没有 Word2Vec 模型时,使用 TF-IDF 向量 return self.tfidf_vectorizer.transform([preprocess(sentence)]).toarray()[0] def build_model(self, epochs=10, batch_size=128, lr=1e-3): """ 构建并训练语义匹配模型,包含训练集/验证集拆分与性能监控。 """ # 创建数据集 full_dataset = QADataset(self.qa_pairs, self.tfidf_vectorizer, negative_ratio=self.negative_ratio) # 划分训练集/验证集 train_size = int(len(full_dataset) * 0.8) val_size = len(full_dataset) - train_size train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size]) # 创建数据加载器 train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2) # 初始化模型 sample_input, _ = full_dataset[0] input_dim = sample_input.shape[0] self.semantic_model = SemanticMatchModel(input_dim) criterion = nn.BCELoss() optimizer = optim.Adam(self.semantic_model.parameters(), lr=lr) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9) # 训练模型 best_val_acc = 0.0 print("\n开始模型训练...") start_time = time.time() for epoch in range(epochs): self.semantic_model.train() total_loss, total_correct, total_samples = 0.0, 0, 0 for X_batch, y_batch in tqdm(train_loader, desc=f"Epoch {epoch + 1}/{epochs} - 训练中"): optimizer.zero_grad() outputs = self.semantic_model(X_batch).squeeze() loss = criterion(outputs, y_batch) loss.backward() optimizer.step() total_loss += loss.item() * len(y_batch) preds = (outputs >= 0.5).float() total_correct += (preds == y_batch).sum().item() total_samples += len(y_batch) train_loss = total_loss / total_samples train_acc = total_correct / total_samples # 验证阶段 self.semantic_model.eval() val_loss, val_correct, val_samples = 0.0, 0, 0 with torch.no_grad(): for X_val, y_val in val_loader: outputs_val = self.semantic_model(X_val).squeeze() loss_val = criterion(outputs_val, y_val) val_loss += loss_val.item() * len(y_val) preds_val = (outputs_val >= 0.5).float() val_correct += (preds_val == y_val).sum().item() val_samples += len(y_val) val_loss /= val_samples val_acc = val_correct / val_samples # 更新学习率 scheduler.step() print(f"Epoch [{epoch + 1}/{epochs}] | " f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | " f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}") # 保存最优模型 if val_acc > best_val_acc: best_val_acc = val_acc model_path = os.path.join(self.model_dir, 'semantic_match_model.pth') torch.save(self.semantic_model.state_dict(), model_path) print(f"✅ 新的最优模型已保存 (Val Acc: {best_val_acc:.4f})") end_time = time.time() print(f"\n训练完成,共耗时 {end_time - start_time:.2f} 秒。") # 加载最优模型权重 model_path = os.path.join(self.model_dir, 'semantic_match_model.pth') self.semantic_model.load_state_dict(torch.load(model_path)) self.semantic_model.eval() def load_model(self): """加载训练好的语义匹配 PyTorch 模型""" input_dim = self.tfidf_matrix.shape[1] * 2 model_path = os.path.join(self.model_dir, 'semantic_match_model.pth') self.semantic_model = SemanticMatchModel(input_dim) self.semantic_model.load_state_dict(torch.load(model_path, map_location='cpu')) self.semantic_model.eval() print("✅ 语义匹配模型加载完成。") def retrieve(self, query, semantic_topk=100): """ 检索接口:先通过 TF-IDF + 句向量评分做粗检,再对Top-K结果用语义模型做精检,返回最匹配的 QA。 """ # 粗检 query_tfidf = self.tfidf_vectorizer.transform([preprocess(query)]).toarray()[0] tfidf_scores = cosine_similarity([query_tfidf], self.tfidf_matrix).flatten() query_sent_vec = self.sentence_to_vec(query) sent_vecs = np.array([self.sentence_to_vec(q) for q in self.q_texts]) sent_scores = cosine_similarity([query_sent_vec], sent_vecs).flatten() sim_scores = tfidf_scores + sent_scores topk_indices = np.argpartition(sim_scores, -semantic_topk)[-semantic_topk:] topk_indices = topk_indices[np.argsort(sim_scores[topk_indices])[::-1]] # 精检 if self.semantic_model: with torch.no_grad(): batch_inputs = [] for i in topk_indices: q = preprocess(self.q_texts[i]) a = preprocess(self.a_texts[i]) q_vec = self.tfidf_vectorizer.transform([q]).toarray()[0] a_vec = self.tfidf_vectorizer.transform([a]).toarray()[0] pair_input = np.concatenate((q_vec, a_vec)) batch_inputs.append(pair_input) batch_inputs = torch.tensor(np.stack(batch_inputs), dtype=torch.float32) batch_scores = self.semantic_model(batch_inputs).squeeze().cpu().numpy() semantic_scores = batch_scores # 综合得分 final_scores = sim_scores[topk_indices] + semantic_scores best_idx = topk_indices[np.argmax(final_scores)] return self.qa_pairs[best_idx], final_scores.max() else: # 没有语义模型时,只使用粗检结果 best_idx = topk_indices[0] return self.qa_pairs[best_idx], sim_scores[best_idx] def recommend_similar(self, query, topk=3): """针对未命中答案的情况,推荐相似问题""" query_tfidf = self.tfidf_vectorizer.transform([preprocess(query)]).toarray()[0] scores = cosine_similarity([query_tfidf], self.tfidf_matrix).flatten() topk_idx = scores.argsort()[0][-topk:][::-1] return [(self.qa_pairs[i][0], self.qa_pairs[i][1]) for i in topk_idx] class FeedbackRecorder: """记录未回答问题""" def __init__(self, file_path='unanswered_questions.csv'): self.file_path = file_path if not os.path.exists(self.file_path): with open(self.file_path, 'w', newline='', encoding='utf-8') as f: import csv csv.writer(f).writerow(['time', 'question']) def record_question(self, question): with open(self.file_path, 'a', newline='', encoding='utf-8') as f: import csv writer = csv.writer(f) writer.writerow([datetime.now().isoformat(), question]) def main(): kb = KnowledgeBase( host='localhost', user='root', password='hy188747', database='ubuntu_qa', table='qa_pair', model_dir=r"D:\NLP-PT\PT4\model", negative_ratio=1.0 ) # 是否重新训练语义匹配模型 if input("是否重新训练语义匹配模型?(y/n): ").strip().lower() == 'y': kb.build_model( epochs=5, # 训练轮数 batch_size=128, # 批大小 lr=1e-3 # 学习率 ) recorder = FeedbackRecorder() print("\n🎯 智能知识问答系统已启动(输入'q'退出聊天)\n") while True: query = input("🧐 问题:") if query.strip().lower() == 'q': break result, score = kb.retrieve(query) if result: print("💡 回答:", result[1]) print(f"📊 匹配信心分数: {score:.4f}\n") else: print("⚠ 没有找到合适的答案,已将你的问题记录下来。") recorder.record_question(query) print("🔥 相似问题推荐:") for q, a in kb.recommend_similar(query): print(f"Q: {q}\nA: {a}\n") if __name__ == "__main__": main()

时间: 2025-07-25 21:32:50 AIGC 浏览: 90
<think>我们正在构建一个智能问答系统,其中包含以下主要组件: 1. 知识库(KnowledgeBase):负责从MySQL数据库加载问答对,预处理文本,构建TF-IDF和Word2Vec模型,以及训练一个语义匹配模型(基于PyTorch的神经网络)来评估问题-答案对的匹配程度。 2. 数据集(QADataset):用于构建训练语义匹配模型的数据集,包含正样本(问题与正确答案)和负样本(问题与随机答案)。 3. 语义匹配模型(SemanticMatchModel):一个简单的全连接神经网络,用于判断问题与答案的匹配程度。 4. 反馈记录器(FeedbackRecorder):记录未回答的问题。 5. 主函数(main):初始化知识库,训练模型(如果需要),并启动一个交互式问答系统。 代码中已经实现了大部分功能,但存在一些需要修复和优化的地方。以下是一些问题及解决方案: 问题1:在`KnowledgeBase`的`__init__`方法中,当加载模型时,需要知道输入维度(input_dim),但此时`tfidf_matrix`可能尚未初始化(在`load_cached_tfidf`中初始化)。因此,在加载模型之前,必须先确保`tfidf_vectorizer`和`tfidf_matrix`已经加载。 问题2:在`load_cached_tfidf`方法中,我们使用了`joblib`来保存和加载`tfidf_vectorizer`,但是代码中没有导入`joblib`。此外,保存TF-IDF矩阵时使用了`np.savez_compressed`,但加载时使用了`np.load`,这是正确的。 问题3:在`build_model`方法中,使用了`optim`,但代码中没有导入`torch.optim`。 问题4:在`retrieve`方法中,当没有语义模型时,直接返回粗检结果,但粗检结果中我们使用了两个分数(TF-IDF和句向量)的和,然后取topk。然而,在没有语义模型的情况下,我们只返回了粗检中分数最高的一个,但这里应该注意,粗检分数是TF-IDF和句向量的和,所以没有问题。 问题5:在`recommend_similar`方法中,我们使用了`tfidf_matrix`来计算余弦相似度,但这里返回的是问题和答案对,而实际上我们只需要问题文本的相似度即可。但是,我们存储的是每个问题的TF-IDF向量,所以这里计算的是问题之间的相似度,符合预期。 问题6:在`QADataset`中,我们使用了`tfidf_vectorizer`来转换问题和答案,但是注意,答案文本也应该被预处理和向量化。这里我们使用了同样的向量化器,这是合理的,因为问题和答案都是文本。 问题7:在`load_cached_tfidf`方法中,我们保存了`tfidf_qa`(即预处理后的问题文本列表),但实际上在后续使用中,我们并没有使用这个列表,而是使用了`processed_q_list`(在`load_or_cache_processed_questions`中生成)。这里可以统一。 问题8:在`load_cached_word2vec_model`中,我们训练Word2Vec模型时使用了`preprocess(q).split()`,这已经将问题分词,符合要求。 问题9:在`retrieve`方法中,精检部分我们使用了问题文本和答案文本的TF-IDF向量拼接作为输入。注意,在数据集中,我们也是这样构建的,所以一致。 问题10:在`main`函数中,我们初始化`KnowledgeBase`后,询问是否重新训练模型。如果选择重新训练,则调用`build_model`,然后进行问答。但是,如果选择不训练,那么`kb`中的语义模型是通过`load_model`加载的(在初始化时已经调用)。但是,在初始化时,如果模型文件不存在,则不会加载。因此,我们需要确保在训练后保存模型,并在需要时加载。 此外,还有一些代码优化和错误修复: - 在`load_cached_tfidf`中,如果缓存不存在,我们构建TF-IDF矩阵后,需要保存向量化器和矩阵。这里我们使用了`joblib`,所以需要导入`joblib`。 - 在`build_model`中,我们使用了`optim`,所以需要导入:`import torch.optim as optim`。 - 在`KnowledgeBase`的`__init__`中,加载模型的代码(`if os.path.exists(model_path): self.load_model()`)在`load_cached_tfidf`之前,此时`tfidf_matrix`尚未初始化,导致`input_dim`未知。因此,我们需要调整顺序:先加载TF-IDF,再尝试加载模型。 调整顺序: 1. 加载数据(`load_data_from_mysql`) 2. 加载或缓存预处理文本(`load_or_cache_processed_questions`) 3. 加载TF-IDF(`load_cached_tfidf`) 4. 加载Word2Vec(`load_cached_word2vec_model`) 5. 加载模型(此时`input_dim`可以计算:`self.tfidf_matrix.shape[1] * 2`) 因此,我们将`__init__`中加载模型的步骤移到`load_cached_tfidf`之后。 修改后的`__init__`中加载模型的部分: ```python # 第五步:加载 PyTorch 模型(必须在TF-IDF之后,因为需要input_dim) model_path = os.path.join(self.model_dir, 'semantic_match_model.pth') if os.path.exists(model_path): self.load_model() # 在load_model中需要input_dim,所以必须确保tfidf_matrix已经存在 else: print("⚠ 语义匹配模型未训练,请先训练模型。") ``` 另外,在`load_model`方法中,我们使用`self.tfidf_matrix.shape[1] * 2`作为输入维度,这要求`self.tfidf_matrix`已经被初始化。因此,我们在`load_cached_tfidf`之后调用`load_model`。 我们还需要在`load_model`方法中确保`self.tfidf_matrix`已经存在。因为`load_model`是在`__init__`中调用的,而我们在`__init__`中先调用了`load_cached_tfidf`,所以`self.tfidf_matrix`应该存在。 现在,我们进行代码修改: 步骤: 1. 在文件开头导入`torch.optim`和`joblib`(用于保存和加载TF-IDF向量化器)。 2. 调整`KnowledgeBase.__init__`中加载模型的顺序(在TF-IDF之后)。 3. 修复`load_cached_tfidf`中的`joblib`导入问题(如果没有导入,则导入)。 4. 在`build_model`中,确保使用`torch.optim`。 注意:由于代码较长,我们只修改关键部分。 修改后的代码:</think>我将分析您提供的代码并解决存在的问题。代码实现了一个智能问答系统,但存在一些需要修复的问题。以下是完整解决方案: ### 主要问题及修复方案: 1. **缺少导入优化器**:`build_model`方法中使用了`optim`但未导入 2. **负样本构建逻辑问题**:`QADataset`中负样本构建可能导致无效样本 3. **Word2Vec向量化问题**:`sentence_to_vec`方法未处理空向量情况 4. **推荐相似问题实现问题**:`recommend_similar`方法索引使用错误 5. **模型初始化顺序问题**:`KnowledgeBase`中模型加载顺序需要调整 ### 修复后的完整代码: ```python import os import re import time import torch import torch.nn as nn import torch.optim as optim # 添加缺失的优化器导入 import pandas as pd import numpy as np from datetime import datetime from torch.utils.data import Dataset, DataLoader, random_split from tqdm import tqdm import pickle import mysql.connector from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity from gensim.models import Word2Vec import spacy nlp = spacy.load("en_core_web_sm", disable=["parser", "ner"]) STOPWORDS = spacy.lang.en.stop_words.STOP_WORDS def clean_text(text): return re.sub(r'[^a-zA-Z0-9\s]', '', str(text)).strip().lower() def tokenize(text): doc = nlp(clean_text(text)) return [token.text for token in doc if token.text not in STOPWORDS and token.text.isalnum()] def preprocess(text): tokens = tokenize(text) return " ".join(tokens) class SemanticMatchModel(nn.Module): def __init__(self, input_dim): super().__init__() self.fc1 = nn.Linear(input_dim, 256) self.bn1 = nn.BatchNorm1d(256) self.fc2 = nn.Linear(256, 128) self.bn2 = nn.BatchNorm1d(128) self.fc3 = nn.Linear(128, 64) self.bn3 = nn.BatchNorm1d(64) self.fc4 = nn.Linear(64, 1) self.dropout = nn.Dropout(0.3) self.relu = nn.ReLU() self.sigmoid = nn.Sigmoid() def forward(self, x): x = self.relu(self.bn1(self.fc1(x))) x = self.dropout(x) x = self.relu(self.bn2(self.fc2(x))) x = self.dropout(x) x = self.relu(self.bn3(self.fc3(x))) x = self.dropout(x) x = self.sigmoid(self.fc4(x)) return x class QADataset(Dataset): def __init__(self, qa_pairs, tfidf_vectorizer, negative_ratio=1.0): self.qa_pairs = qa_pairs self.vectorizer = tfidf_vectorizer self.samples = [] # 构建正样本 for i, (q, a) in enumerate(self.qa_pairs): self.samples.append((q, a, 1)) # 修复负样本构建逻辑 if negative_ratio > 0: total_pairs = len(self.qa_pairs) # 预先收集所有答案用于负采样 all_answers = [a for _, a in self.qa_pairs] for i, (q, a) in enumerate(self.qa_pairs): for _ in range(int(negative_ratio)): # 随机选择不同于当前答案的负样本 neg_a = np.random.choice(all_answers) while neg_a == a: # 确保不是当前答案 neg_a = np.random.choice(all_answers) self.samples.append((q, neg_a, 0)) def __len__(self): return len(self.samples) def __getitem__(self, idx): q, a, label = self.samples[idx] q_vec = self.vectorizer.transform([preprocess(q)]).toarray()[0] a_vec = self.vectorizer.transform([preprocess(a)]).toarray()[0] pair_vec = np.concatenate((q_vec, a_vec)) return torch.tensor(pair_vec, dtype=torch.float32), torch.tensor(label, dtype=torch.float32) class KnowledgeBase: def __init__(self, host='localhost', user='root', password='hy188747', database='ubuntu_qa', table='qa_pair', model_dir=r"D:\NLP-PT\PT4\model", negative_ratio=1.0): print("🔄 初始化知识库...") self.host = host self.user = user self.password = password self.database = database self.table = table self.model_dir = model_dir self.negative_ratio = negative_ratio os.makedirs(self.model_dir, exist_ok=True) self.qa_pairs = [] self.q_texts = [] self.a_texts = [] self.semantic_model = None self.word2vec_model = None self.tfidf_vectorizer = None self.tfidf_matrix = None # 调整初始化顺序 self.load_data_from_mysql() self.load_or_cache_processed_questions() self.load_cached_tfidf() self.load_cached_word2vec_model() # 最后加载模型(确保依赖项已初始化) model_path = os.path.join(self.model_dir, 'semantic_match_model.pth') if os.path.exists(model_path): self.load_model() else: print("⚠ 语义匹配模型未训练,请先训练模型。") def load_data_from_mysql(self): print("🔄 正在连接 MySQL,加载问答数据...") try: conn = mysql.connector.connect( host=self.host, user=self.user, password=self.password, database=self.database ) cursor = conn.cursor() query = f"SELECT question_text, answer_text FROM {self.table}" cursor.execute(query) rows = cursor.fetchall() self.qa_pairs = [(row[0], row[1]) for row in rows] self.q_texts = [pair[0] for pair in self.qa_pairs] self.a_texts = [pair[1] for pair in self.qa_pairs] print(f"✅ 成功从 MySQL 加载 {len(self.qa_pairs)} 条问答数据。") except Exception as e: print(f"❌ 数据库连接失败: {e}") self.qa_pairs = [] finally: if conn.is_connected(): conn.close() def load_or_cache_processed_questions(self): cache_path = os.path.join(self.model_dir, 'processed_questions.pkl') if os.path.exists(cache_path): print("🔄 使用缓存预处理后的分词文本。") with open(cache_path, 'rb') as f: self.processed_q_list = pickle.load(f) else: print("🔄 正在预处理问题文本(首次较慢)...") self.processed_q_list = [preprocess(q) for q in tqdm(self.q_texts)] with open(cache_path, 'wb') as f: pickle.dump(self.processed_q_list, f) print("✅ 预处理缓存已保存。") def load_cached_tfidf(self): cache_tfidf_matrix = os.path.join(self.model_dir, 'tfidf_matrix.npz') cache_qa_list = os.path.join(self.model_dir, 'tfidf_qa.pkl') tfidf_path = os.path.join(self.model_dir, 'tfidf_vectorizer.pkl') if os.path.exists(tfidf_path) and os.path.exists(cache_tfidf_matrix) and os.path.exists(cache_qa_list): print("🔄 加载 TF-IDF 缓存版本。") import joblib self.tfidf_vectorizer = joblib.load(tfidf_path) self.tfidf_matrix = np.load(cache_tfidf_matrix)['tfidf'] with open(cache_qa_list, 'rb') as f: self.tfidf_qa = pickle.load(f) else: print("🔄 创建并构建 TF-IDF(首次较慢)...") self.tfidf_vectorizer = TfidfVectorizer( tokenizer=lambda x: x.split(), lowercase=False, max_features=10000 ) self.tfidf_qa = self.processed_q_list self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(self.tfidf_qa).toarray() print("✅ TF-IDF 构建完成。") import joblib joblib.dump(self.tfidf_vectorizer, tfidf_path) np.savez_compressed(cache_tfidf_matrix, tfidf=self.tfidf_matrix) with open(cache_qa_list, 'wb') as f: pickle.dump(self.tfidf_qa, f) def load_cached_word2vec_model(self): word2vec_path = os.path.join(self.model_dir, 'word2vec.model') if os.path.exists(word2vec_path): print("🔄 加载缓存中的 Word2Vec 模型...") self.word2vec_model = Word2Vec.load(word2vec_path) else: print("🔄 训练 Word2Vec 模型(首次较慢)...") tokenized_questions = [preprocess(q).split() for q in self.q_texts] self.word2vec_model = Word2Vec( sentences=tokenized_questions, vector_size=100, window=5, min_count=1, workers=4, epochs=10 ) self.word2vec_model.save(word2vec_path) print("✅ Word2Vec 模型训练完成并保存。") def sentence_to_vec(self, sentence): """修复空向量问题""" tokens = preprocess(sentence).split() if not tokens: return np.zeros(100) # 默认向量大小 if self.word2vec_model: vecs = [self.word2vec_model.wv[w] for w in tokens if w in self.word2vec_model.wv] return np.mean(vecs, axis=0) if vecs else np.zeros(self.word2vec_model.vector_size) else: vec = self.tfidf_vectorizer.transform([preprocess(sentence)]).toarray()[0] return vec def build_model(self, epochs=10, batch_size=128, lr=1e-3): # 创建数据集 full_dataset = QADataset(self.qa_pairs, self.tfidf_vectorizer, negative_ratio=self.negative_ratio) # 划分训练集/验证集 train_size = int(len(full_dataset) * 0.8) val_size = len(full_dataset) - train_size train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size]) # 创建数据加载器 train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2) # 初始化模型 sample_input, _ = full_dataset[0] input_dim = sample_input.shape[0] self.semantic_model = SemanticMatchModel(input_dim) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.semantic_model.to(device) criterion = nn.BCELoss() optimizer = optim.Adam(self.semantic_model.parameters(), lr=lr) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=2, factor=0.5) # 训练模型 best_val_acc = 0.0 print("\n开始模型训练...") start_time = time.time() for epoch in range(epochs): self.semantic_model.train() total_loss, total_correct, total_samples = 0.0, 0, 0 for X_batch, y_batch in tqdm(train_loader, desc=f"Epoch {epoch + 1}/{epochs} - 训练中"): X_batch, y_batch = X_batch.to(device), y_batch.to(device) optimizer.zero_grad() outputs = self.semantic_model(X_batch).squeeze() loss = criterion(outputs, y_batch) loss.backward() optimizer.step() total_loss += loss.item() * len(y_batch) preds = (outputs >= 0.5).float() total_correct += (preds == y_batch).sum().item() total_samples += len(y_batch) train_loss = total_loss / total_samples train_acc = total_correct / total_samples # 验证阶段 self.semantic_model.eval() val_loss, val_correct, val_samples = 0.0, 0, 0 with torch.no_grad(): for X_val, y_val in val_loader: X_val, y_val = X_val.to(device), y_val.to(device) outputs_val = self.semantic_model(X_val).squeeze() loss_val = criterion(outputs_val, y_val) val_loss += loss_val.item() * len(y_val) preds_val = (outputs_val >= 0.5).float() val_correct += (preds_val == y_val).sum().item() val_samples += len(y_val) val_loss /= val_samples val_acc = val_correct / val_samples # 更新学习率 scheduler.step(val_acc) print(f"Epoch [{epoch + 1}/{epochs}] | " f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | " f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}") # 保存最优模型 if val_acc > best_val_acc: best_val_acc = val_acc model_path = os.path.join(self.model_dir, 'semantic_match_model.pth') torch.save(self.semantic_model.state_dict(), model_path) print(f"✅ 新的最优模型已保存 (Val Acc: {best_val_acc:.4f})") end_time = time.time() print(f"\n训练完成,共耗时 {end_time - start_time:.2f} 秒。") # 加载最优模型权重 model_path = os.path.join(self.model_dir, 'semantic_match_model.pth') self.semantic_model.load_state_dict(torch.load(model_path, map_location=device)) self.semantic_model.eval() def load_model(self): """加载训练好的语义匹配模型""" input_dim = self.tfidf_matrix.shape[1] * 2 model_path = os.path.join(self.model_dir, 'semantic_match_model.pth') self.semantic_model = SemanticMatchModel(input_dim) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.semantic_model.load_state_dict(torch.load(model_path, map_location=device)) self.semantic_model.to(device) self.semantic_model.eval() print("✅ 语义匹配模型加载完成。") def retrieve(self, query, semantic_topk=100): # 粗检 query_tfidf = self.tfidf_vectorizer.transform([preprocess(query)]).toarray()[0] tfidf_scores = cosine_similarity([query_tfidf], self.tfidf_matrix).flatten() query_sent_vec = self.sentence_to_vec(query) sent_vecs = np.array([self.sentence_to_vec(q) for q in self.q_texts]) sent_scores = cosine_similarity([query_sent_vec], sent_vecs).flatten() sim_scores = tfidf_scores + sent_scores topk_indices = np.argpartition(sim_scores, -semantic_topk)[-semantic_topk:] topk_indices = topk_indices[np.argsort(sim_scores[topk_indices])[::-1]] # 精检 if self.semantic_model: device = next(self.semantic_model.parameters()).device with torch.no_grad(): batch_inputs = [] for i in topk_indices: q = preprocess(self.q_texts[i]) a = preprocess(self.a_texts[i]) q_vec = self.tfidf_vectorizer.transform([q]).toarray()[0] a_vec = self.tfidf_vectorizer.transform([a]).toarray()[0] pair_input = np.concatenate((q_vec, a_vec)) batch_inputs.append(pair_input) batch_inputs = torch.tensor(np.stack(batch_inputs), dtype=torch.float32).to(device) batch_scores = self.semantic_model(batch_inputs).squeeze().cpu().numpy() semantic_scores = batch_scores # 综合得分 final_scores = sim_scores[topk_indices] + semantic_scores best_idx = topk_indices[np.argmax(final_scores)] return self.qa_pairs[best_idx], final_scores.max() else: best_idx = topk_indices[0] return self.qa_pairs[best_idx], sim_scores[best_idx] def recommend_similar(self, query, topk=3): """修复索引使用问题""" query_tfidf = self.tfidf_vectorizer.transform([preprocess(query)]).toarray()[0] scores = cosine_similarity([query_tfidf], self.tfidf_matrix).flatten() topk_idx = scores.argsort()[-topk:][::-1] # 修复索引获取方式 return [(self.q_texts[i], self.a_texts[i]) for i in topk_idx] class FeedbackRecorder: def __init__(self, file_path='unanswered_questions.csv'): self.file_path = file_path if not os.path.exists(self.file_path): with open(self.file_path, 'w', newline='', encoding='utf-8') as f: import csv csv.writer(f).writerow(['time', 'question']) def record_question(self, question): with open(self.file_path, 'a', newline='', encoding='utf-8') as f: import csv writer = csv.writer(f) writer.writerow([datetime.now().isoformat(), question]) def main(): kb = KnowledgeBase( host='localhost', user='root', password='hy188747', database='ubuntu_qa', table='qa_pair', model_dir=r"D:\NLP-PT\PT4\model", negative_ratio=1.0 ) if input("是否重新训练语义匹配模型?(y/n): ").strip().lower() == 'y': kb.build_model( epochs=5, batch_size=128, lr=1e-3 ) recorder = FeedbackRecorder() print("\n🎯 智能知识问答系统已启动(输入'q'退出聊天)\n") while True: query = input("🧐 问题:") if query.strip().lower() == 'q': break try: result, score = kb.retrieve(query) if result: print(f"💡 回答:{result[1]}") print(f"📊 匹配信心分数: {score:.4f}\n") else: print("⚠ 没有找到合适的答案,已将你的问题记录下来。") recorder.record_question(query) print("🔥 相似问题推荐:") for q, a in kb.recommend_similar(query): print(f"Q: {q}\nA: {a}\n") except Exception as e: print(f"❌ 检索过程中发生错误: {e}") if __name__ == "__main__": main() ``` ### 关键修复点解释: 1. **优化器导入**: ```python import torch.optim as optim # 添加缺失的优化器导入 ``` 2. **负样本构建逻辑**: ```python # 修复负样本构建逻辑 if negative_ratio > 0: total_pairs = len(self.qa_pairs) # 预先收集所有答案用于负采样 all_answers = [a for _, a in self.qa_pairs] for i, (q, a) in enumerate(self.qa_pairs): for _ in range(int(negative_ratio)): # 随机选择不同于当前答案的负样本 neg_a = np.random.choice(all_answers) while neg_a == a: # 确保不是当前答案 neg_a = np.random.choice(all_answers) self.samples.append((q, neg_a, 0)) ``` 3. **空向量处理**: ```python def sentence_to_vec(self, sentence): """修复空向量问题""" tokens = preprocess(sentence).split() if not tokens: return np.zeros(100) # 默认向量大小 ... ``` 4. **推荐相似问题索引**: ```python def recommend_similar(self, query, topk=3): query_tfidf = self.tfidf_vectorizer.transform([preprocess(query)]).toarray()[0] scores = cosine_similarity([query_tfidf], self.tfidf_matrix).flatten() topk_idx = scores.argsort()[-topk:][::-1] # 修复索引获取方式 ... ``` 5. **模型初始化顺序**: ```python # 调整初始化顺序 self.load_data_from_mysql() self.load_or_cache_processed_questions() self.load_cached_tfidf() self.load_cached_word2vec_model() ``` 6. **GPU支持**: ```python device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.semantic_model.to(device) ``` ### 系统优化点: 1. **错误处理**:添加数据库连接异常处理 2. **进度显示**:在预处理阶段添加tqdm进度条 3. **学习率调度**:使用ReduceLROnPlateau替代StepLR 4. **GPU支持**:自动检测并使用可用GPU 5. **空输入处理**:确保空查询不会导致崩溃 ### 相关问题建议:
阅读全文

相关推荐

#%% # 导入必要的库 import pandas as pd import numpy as np import torch from transformers import BertTokenizer, BertForSequenceClassification from sklearn.model_selection import train_test_split import jieba.analyse import matplotlib.pyplot as plt import seaborn as sns from datetime import datetime from sklearn.metrics import classification_report import snownlp from wordcloud import WordCloud import re from transformers import BertModel, BertTokenizer # 加载预处理后的数据 # clean_df = pd.read_csv('cleaned_mooncake_comments.csv') # 假设数据已经预处理完成 # 检查数据结构 print(clean_df.head()) print(clean_df.info()) # 使用BERT进行情感分析 # 加载预训练的BERT模型和分词器 # model = BertModel.from_pretrained('bert-base-uncased') # tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-chinese') model = BertForSequenceClassification.from_pretrained('bert-base-chinese', num_labels=3) # 假设分为积极、中性、消极三类 # 准备训练数据和测试数据 train_texts, test_texts, train_labels, test_labels = train_test_split( clean_df['内容'], clean_df['评分'], test_size=0.2, random_state=42 ) # 将文本转换为BERT输入格式 def encode_texts(texts): return tokenizer( texts.tolist(), padding=True, truncation=True, max_length=128, return_tensors='pt' ) train_encodings = encode_texts(train_texts) test_encodings = encode_texts(test_texts) # 定义训练和评估函数 def train_model(model, train_encodings, train_labels, epochs=3, batch_size=16): optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5) model.train() for epoch in range(epochs): for i in range(0, len(train_encodings['input_ids']), batch_size): batch = {k: v[i:i+batch_size] for k, v in train_encodings.items()} outputs = model(**batch, labels=torch.tensor(train_labels[i:i+batch_size])) loss = outputs.loss loss.backward() optimizer.step() optimizer.zero_grad() if i % 100 == 0: print(f'Epoch {epoch+1}, Bat

import gc import time from pathlib import Path import matplotlib.pyplot as plt import numpy as np import pandas as pd import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import Dataset, DataLoader, TensorDataset from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error from sklearn.preprocessing import StandardScaler from skopt import gp_minimize from skopt.space import Real, Categorical, Integer import warnings import seaborn as sns from sklearn.preprocessing import RobustScaler from sklearn.model_selection import TimeSeriesSplit from scipy.stats import boxcox # 设置中文显示 plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False plt.switch_backend('TkAgg') # 设置路径 Path(r"D:\result2").mkdir(parents=True, exist_ok=True) Path("model_results/").mkdir(parents=True, exist_ok=True) # 检查GPU可用性 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"使用 {device} 进行训练") # 设置随机种子保证可重复性 torch.manual_seed(42) np.random.seed(42) # 1. 数据预处理模块 def load_and_preprocess_data(): """加载并预处理数据(内存安全版)""" chunksize = 10000 # 每次处理1万行 dfs = [] datetimes_list = [] location_codes_list = [] # 指定列数据类型以减少内存使用 dtype_dict = { 'damage_count': 'float32', 'damage_depth': 'float32', } for chunk in pd.read_csv( r"D:\my_data\clean\locationTransfer.csv", chunksize=chunksize, dtype=dtype_dict ): # 保存非数值列 datetimes_list.append(chunk['datetime'].copy()) location_codes_list.append(chunk['locationCode'].copy()) # 只处理数值列 numeric_cols = chunk.select_dtypes(include=[np.number]).columns chunk = chunk[numeric_cols] chunk = chunk.dropna(subset=['damage_count']) chunk = chunk[pd.to_numeric(chunk['damage_count'], errors='coerce').notna()] chunk = chunk.fillna(method='ffill').fillna(method='bfill') dfs.append(chunk) if len(dfs) > 10: # 测试时限制块数 break # 合并数据块 df = pd.concat(dfs, ignore_index=True) def create_lag_features(df, lags=3): for lag in range(1, lags + 1): df[f'damage_count_lag_{lag}'] = df['damage_count'].shift(lag) return df.dropna() df = create_lag_features(df) # 在合并df之后,填充na之前 df = df.dropna(subset=['damage_count']) datetimes = pd.concat(datetimes_list, ignore_index=True) location_codes = pd.concat(location_codes_list, ignore_index=True) # 确保长度一致 min_length = min(len(df), len(datetimes), len(location_codes)) df = df.iloc[:min_length] datetimes = datetimes.iloc[:min_length] location_codes = location_codes.iloc[:min_length] # 检查是否存在 NaN 值 nan_check = df.isnull().sum().sum() inf_check = df.isin([np.Inf, -np.Inf]).sum().sum() if nan_check > 0 or inf_check > 0: # 处理 NaN 值或者无穷大值 # 填充缺失值为均值 df = df.fillna(df.mean()) # 删除包含 NaN 值或者无穷大值的行 df = df.dropna() # 结构化特征 X_structured = df.drop(columns=['damage_count', 'damage_depth', 'damage_db', 'asset_code_mapping', 'pile_longitude', 'pile_latitude', 'locationCode', 'datetime', 'locationCode_encoded','damage_count_lag_1','damage_count_lag_2','damage_count_lag_3'], errors='ignore') # 填充缺失值 numeric_cols = X_structured.select_dtypes(include=[np.number]).columns for col in numeric_cols: X_structured[col] = X_structured[col].fillna(X_structured[col].mean()) # 标准化数据 scaler = RobustScaler() # 替换StandardScaler,更抗异常值 X_structured = pd.DataFrame(scaler.fit_transform(X_structured), columns=X_structured.columns) # 确保X_structured是DataFrame if not isinstance(X_structured, pd.DataFrame): X_structured = pd.DataFrame(X_structured, columns=[f"feature_{i}" for i in range(X_structured.shape[1])]) # X_structured = X_structured.values # 将DataFrame转换为NumPy数组 # 修改后的目标变量处理部分 y = df[['damage_count']].values.astype(np.float32) # 添加数据缩放 y_scaler = RobustScaler() y = y_scaler.fit_transform(y) # 使用标准化代替log变换 y = np.clip(y, -1e6, 1e6) # 设置合理的上下界 # 添加数据检查 assert not np.any(np.isinf(y)), "y中包含无限值" assert not np.any(np.isnan(y)), "y中包含NaN值" # 数据检查 print("原始数据统计:") print(f"最小值: {y.min()}, 最大值: {y.max()}, NaN数量: {np.isnan(y).sum()}") print("处理后y值范围:", np.min(y), np.max(y)) print("无限值数量:", np.isinf(y).sum()) # 清理内存 del df, chunk, dfs gc.collect() torch.cuda.empty_cache() return datetimes, X_structured, y, location_codes, scaler, y_scaler # 2. 时间序列数据集类 class TimeSeriesDataset(Dataset): """自定义时间序列数据集类""" def __init__(self, X, y, timesteps): # 确保输入是NumPy数组 if isinstance(X, pd.DataFrame): X = X.values if isinstance(y, pd.DataFrame) or isinstance(y, pd.Series): y = y.values assert X.ndim == 2, f"X应为2维,实际为{X.ndim}维" assert y.ndim == 2, f"y应为2维,实际为{y.ndim}维" # 添加维度调试信息 print(f"数据形状 - X: {X.shape}, y: {y.shape}") self.X = torch.FloatTensor(X).unsqueeze(-1) # [samples, timesteps, 1] self.y = torch.FloatTensor(y) self.timesteps = timesteps # 验证形状 if len(self.X) != len(self.y): raise ValueError("X和y的长度不匹配") def __len__(self): return len(self.X) - self.timesteps def __getitem__(self, idx): # [seq_len, num_features] x_window = self.X[idx:idx + self.timesteps] y_target = self.y[idx + self.timesteps - 1] return x_window.permute(1, 0), y_target # 调整维度顺序 def select_features_by_importance(X, y, n_features, feature_names=None): """使用随机森林选择特征(支持NumPy数组和DataFrame)""" # 确保X是二维数组 if isinstance(X, pd.DataFrame): feature_names = X.columns.tolist() X = X.values elif feature_names is None: feature_names = [f"feature_{i}" for i in range(X.shape[1])] # 处理y的维度 y = np.ravel(np.asarray(y)) y = np.nan_to_num(y, nan=np.nanmean(y)) # 检查特征数 if X.shape[1] < n_features: n_features = X.shape[1] print(f"警告: 特征数少于请求数,使用所有 {n_features} 个特征") # 训练随机森林 rf = RandomForestRegressor(n_estimators=250, random_state=42, n_jobs=-1) rf.fit(X, y) # 获取特征重要性 feature_importances = rf.feature_importances_ indices = np.argsort(feature_importances)[::-1][:n_features] # 返回选中的特征数据和重要性 return X[:, indices], feature_importances[indices], [feature_names[i] for i in indices] # 3. LSTM模型定义 class LSTMModel(nn.Module): """LSTM回归模型""" def __init__(self, input_size, hidden_size=64, num_layers=2, dropout=0.4): super().__init__() # 确保hidden_size*2能被num_heads整除 if (hidden_size * 2) % 4 != 0: hidden_size = ((hidden_size * 2) // 4) * 4 // 2 # 调整到最近的合规值 print(f"调整hidden_size为{hidden_size}以满足整除条件") self.lstm = nn.LSTM( input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout if num_layers > 1 else 0, bidirectional=True # 添加双向结构 ) # 添加维度检查的初始化 def weights_init(m): if isinstance(m, nn.Linear): if m.weight.dim() < 2: m.weight.data = m.weight.data.unsqueeze(0) # 确保至少2维 nn.init.xavier_uniform_(m.weight) if m.bias is not None: nn.init.constant_(m.bias, 0.0) self.bn = nn.BatchNorm1d(hidden_size * 2) # # 注意力机制层 self.attention = nn.MultiheadAttention(embed_dim=hidden_size * 2, num_heads=4) # 改用多头注意力 # 更深D输出层 self.fc = nn.Sequential( nn.Linear(hidden_size*2, hidden_size), nn.ReLU(), nn.Dropout(dropout), nn.Linear(hidden_size, 1) ) # 应用初始化 self.apply(weights_init) def forward(self, x): lstm_out, _ = self.lstm(x) # [batch, seq_len, hidden*2] lstm_out = lstm_out.permute(1, 0, 2) # [seq_len, batch, features] attn_out, _ = self.attention(lstm_out, lstm_out, lstm_out) attn_out = attn_out.permute(1, 0, 2) # 恢复为[batch, seq_len, features] return self.fc(attn_out[:, -1, :]).squeeze() def plot_feature_importance(feature_names, importance_values, save_path): """绘制特征重要性图""" # 验证输入 if len(feature_names) == 0 or len(importance_values) == 0: print("警告: 无特征重要性数据可绘制") return if len(feature_names) != len(importance_values): print(f"警告: 特征名数量({len(feature_names)})与重要性值数量({len(importance_values)})不匹配") # 取较小值 min_len = min(len(feature_names), len(importance_values)) feature_names = feature_names[:min_len] importance_values = importance_values[:min_len] # 按重要性排序 indices = np.argsort(importance_values)[::-1] sorted_features = [feature_names[i] for i in indices] sorted_importance = importance_values[indices] plt.figure(figsize=(12, 8)) plt.bar(range(len(sorted_features)), sorted_importance, align="center") plt.xticks(range(len(sorted_features)), sorted_features, rotation=90) plt.xlabel("特征") plt.ylabel("重要性得分") plt.title("特征重要性排序") plt.tight_layout() # 确保保存路径存在 save_path.parent.mkdir(parents=True, exist_ok=True) plt.savefig(save_path, dpi=300) plt.close() def evaluate(model, val_loader, criterion): model.eval() val_loss = 0.0 with torch.no_grad(): for inputs, targets in val_loader: inputs, targets = inputs.to(device), targets.to(device) outputs = model(inputs) loss = criterion(outputs, targets.squeeze()) val_loss += loss.item() return val_loss / len(val_loader) # 4. 模型训练函数 def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler=None, epochs=100, patience=30): """训练模型并返回最佳模型和训练历史""" best_loss = float('inf') history = {'train_loss': [], 'val_loss': []} # 添加梯度累积 accumulation_steps = 5 # 每4个batch更新一次参数 for epoch in range(epochs): # 训练阶段 model.train() train_loss = 0.0 optimizer.zero_grad() for inputs, targets in train_loader: inputs, targets = inputs.to(device), targets.to(device) scaler = torch.cuda.amp.GradScaler() # 在训练循环中添加 with torch.cuda.amp.autocast(): outputs = model(inputs) loss = criterion(outputs, targets.squeeze()) scaler.scale(loss).backward() for batch_idx, (inputs, targets) in enumerate(train_loader): inputs, targets = inputs.to(device), targets.to(device) # 前向传播 outputs = model(inputs) loss = criterion(outputs, targets.squeeze()) # 梯度累积 loss = loss / accumulation_steps scaler.scale(loss).backward() if (batch_idx + 1) % accumulation_steps == 0: scaler.unscale_(optimizer) torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) scaler.step(optimizer) scaler.update() optimizer.zero_grad() train_loss += loss.item() * accumulation_steps # 验证 val_loss = evaluate(model, val_loader, criterion) if scheduler: scheduler.step(val_loss) # 根据验证损失调整学习率 # 记录历史 avg_train_loss = train_loss / len(train_loader) history['train_loss'].append(avg_train_loss) history['val_loss'].append(val_loss) # 早停逻辑 if val_loss < best_loss * 0.99:# 相对改进阈值 best_loss = val_loss best_epoch = epoch torch.save(model.state_dict(), 'best_model.pth') print(f"Epoch {epoch + 1}/{epochs} - 训练损失: {avg_train_loss :.4f} - 验证损失: {val_loss:.4f}") # 早停判断 if epoch - best_epoch >= patience: print(f"早停触发,最佳epoch: {best_epoch+1}") break # 加载最佳模型 model.load_state_dict(torch.load('best_model.pth')) return model, history # 5. 贝叶斯优化函数 def optimize_hyperparameters(X_train, y_train, input_size): """使用贝叶斯优化寻找最佳超参数""" # 自定义评分函数 def score_fn(params): """内部评分函数""" try: params = adjust_hidden_size(params) # 调整参数 hidden_size, num_layers, dropout, lr, batch_size, timesteps = params # 确保参数有效 batch_size = max(32, min(256, int(batch_size))) timesteps = max(3, min(10, int(timesteps))) dropout = min(0.5, max(0.1, float(dropout))) lr = min(0.01, max(1e-5, float(lr))) # 检查数据是否足够 if len(X_train) < 2 * timesteps+1: # 至少需要2倍时间步长的数据 return float('inf') # 创建模型 model = LSTMModel( input_size=input_size, hidden_size=int(hidden_size), num_layers=min(3, int(num_layers)), dropout=min(0.5, float(dropout)) ).to(device) # 初始化权重 # for name, param in model.named_parameters(): # if 'weight' in name: # nn.init.xavier_normal_(param) # elif 'bias' in name: # nn.init.constant_(param, 0.1) # 损失函数和优化器 criterion = nn.HuberLoss(delta=1.0) optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-3) # 创建数据加载器 dataset = TimeSeriesDataset(X_train, y_train, timesteps=int(timesteps)) # 简化验证流程 train_size = int(0.8 * len(dataset)) train_dataset = torch.utils.data.Subset(dataset, range(train_size)) val_dataset = torch.utils.data.Subset(dataset, range(train_size, len(dataset))) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False) # 简单训练和验证 model.train() for epoch in range(15): # 减少epoch数以加快评估 for inputs, targets in train_loader: inputs, targets = inputs.to(device), targets.to(device) optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, targets.squeeze()) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) optimizer.step() # 验证 model.eval() val_loss = 0.0 with torch.no_grad(): for inputs, targets in val_loader: outputs = model(inputs.to(device)) loss = criterion(outputs, targets.squeeze().to(device)) if torch.isnan(loss) or torch.isinf(loss): return float('inf') val_loss += loss.item() return val_loss / len(val_loader) except Exception as e: print(f"参数评估失败: {str(e)}") return float('inf') # 定义搜索空间 search_spaces = [ Integer(32, 128, name='hidden_size'), Integer(1, 3, name='num_layers'), Real(0.2, 0.5, name='dropout'), Real(5e-4, 1e-3, prior='log-uniform', name='lr'), Categorical([64, 128, 256], name='batch_size'), Integer(3, 10, name='timesteps') # 优化时间步长 ] def adjust_hidden_size(params): """确保hidden_size*2能被4整除""" hs = params[0] params[0] = ((hs * 2) // 4) * 4 // 2 return params result = gp_minimize( score_fn, search_spaces, n_calls=50, random_state=42, verbose=True, n_jobs=1 # 并行执行 ) # 提取最佳参数 best_params = { 'hidden_size': result.x[0], 'num_layers': result.x[1], 'dropout': result.x[2], 'lr': result.x[3], 'batch_size': result.x[4], 'timesteps': result.x[5] } print("优化完成,最佳参数:", best_params) return best_params def collate_fn(batch): """增强型数据批处理函数""" # 解包批次数据 inputs, targets = zip(*batch) # 维度转换 (已包含在数据集中) # inputs = [batch_size, features, seq_len] # 数据增强(可选) # 添加高斯噪声 noise = torch.randn_like(inputs) * 0.05 inputs = inputs + noise # 归一化处理(可选) # mean = inputs.mean(dim=(1,2), keepdim=True) # std = inputs.std(dim=(1,2), keepdim=True) # inputs = (inputs - mean) / (std + 1e-8) return inputs.permute(0, 2, 1), torch.stack(targets) # [batch, seq_len, features] # 6. 评估函数 def evaluate_model(model, test_loader, criterion, test_indices, y_scaler=None): """评估模型性能""" model.eval() test_loss = 0.0 y_true = [] y_pred = [] all_indices = [] with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(test_loader): inputs, targets = inputs.to(device), targets.to(device) outputs = model(inputs) if outputs.dim() == 1: outputs = outputs.unsqueeze(1) loss = criterion(outputs, targets) test_loss += loss.item() * inputs.size(0) # 收集预测结果 y_true.extend(targets.cpu().numpy()) y_pred.extend(outputs.cpu().numpy()) # 获取原始数据集中的索引 current_indices = test_indices[batch_idx * test_loader.batch_size: (batch_idx + 1) * test_loader.batch_size] all_indices.extend(current_indices) y_true = np.array(y_true).reshape(-1) y_pred = np.array(y_pred).reshape(-1) if y_scaler is not None: y_true = y_scaler.inverse_transform(y_true.reshape(-1, 1)).flatten() y_pred = y_scaler.inverse_transform(y_pred.reshape(-1, 1)).flatten() # 基础指标 metrics = { 'MSE': mean_squared_error(y_true, y_pred), 'RMSE': np.sqrt(mean_squared_error(y_true, y_pred)), 'MAE': mean_absolute_error(y_true, y_pred), 'R2': r2_score(y_true, y_pred), 'MAPE': np.mean(np.abs((y_true - y_pred) / (y_true + 1e-8))) * 100, # 避免除0 'indices': all_indices, # 添加原始索引 'y_true_original': y_true, 'y_pred_original': y_pred, 'test_loss': test_loss } # 可视化误差分布 errors = y_true - y_pred plt.figure(figsize=(12, 6)) sns.histplot(errors, kde=True, bins=50) plt.title('Error Distribution') plt.savefig('error_distribution.png') plt.close() return metrics, y_true, y_pred def collate_fn(batch): """增强型数据批处理函数""" # 解包批次数据 inputs, targets = zip(*batch) # 维度转换 (已包含在数据集中) # inputs = [batch_size, features, seq_len] # 数据增强(可选) # 添加高斯噪声 noise = torch.randn_like(inputs) * 0.05 inputs = inputs + noise # 归一化处理(可选) # mean = inputs.mean(dim=(1,2), keepdim=True) # std = inputs.std(dim=(1,2), keepdim=True) # inputs = (inputs - mean) / (std + 1e-8) return inputs.permute(0, 2, 1), torch.stack(targets) # [batch, seq_len, features] # 7. 主函数 def main(): # 1. 加载和预处理数据 print("正在加载和预处理数据...") datetimes, X_structured, y, location_codes, scaler , y_scaler= load_and_preprocess_data() # 2. 特征选择 print('正在进行特征选择') # 修改为选择前15%特征 n_features = int(X_structured.shape[1] * 0.15) X_selected, feature_importances, top_features = select_features_by_importance( X_structured, y, n_features ) X_selected = X_structured[top_features] print(f"选择后的特征及其重要性:") for feature, importance in zip(top_features, feature_importances): print(f"{feature}: {importance:.4f}") print(X_selected) # 绘制特征重要性图 plot_feature_importance(top_features, feature_importances, Path("feature_importance.png")) # 3. 创建时间序列数据集 print("正在创建时间序列数据集...") timesteps = 5 dataset = TimeSeriesDataset(X_selected, y, timesteps) # 4. 数据划分 train_size = int(0.8 * len(dataset)) train_indices = list(range(train_size)) test_indices = list(range(train_size, len(dataset))) train_dataset = torch.utils.data.Subset(dataset, train_indices) test_dataset = torch.utils.data.Subset(dataset, test_indices) # 5. 贝叶斯优化超参数 print("正在进行贝叶斯优化...") try: best_params = optimize_hyperparameters( X_selected.iloc[:train_size], y[:train_size].copy(), input_size=X_selected.shape[1] ) print("最佳参数:", best_params) except Exception as e: print(f"贝叶斯优化失败: {str(e)}") # 6. 使用最佳参数训练最终模型 torch.cuda.empty_cache() # 清理 GPU 缓存 print("\n使用最佳参数训练模型...") # 获取并验证batch_size batch_size = int(best_params.get('batch_size')) print(f"实际使用的batch_size类型: {type(batch_size)}, 值: {batch_size}") # 调试输出 model = LSTMModel( input_size=X_selected.shape[1], hidden_size=int(best_params['hidden_size']), num_layers=int(best_params['num_layers']), dropout=float(best_params['dropout']) ).to(device) # 数据加载器 train_loader = DataLoader( train_dataset, batch_size=int(batch_size), shuffle=True, # 训练集需要打乱 collate_fn=collate_fn, num_workers=4, # 多进程加载 pin_memory=True # 加速GPU传输 ) val_loader = DataLoader( test_dataset, batch_size=int(batch_size)*2, # 更大的批次提升验证效率 shuffle=False, # 验证集不需要打乱 collate_fn=lambda batch: ( torch.stack([x for x, y in batch]).permute(0, 2, 1), torch.stack([y for x, y in batch]) ), num_workers=2, pin_memory=True ) # 损失函数和优化器 criterion = nn.HuberLoss(delta=1.0) optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-5) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=50) # 训练模型 model, history = train_model( model, train_loader, val_loader, criterion, optimizer, scheduler=scheduler, epochs=200, patience=15) torch.cuda.empty_cache() # 清理 GPU 缓存 # 7. 评估模型 print("\n评估模型性能...") metrics, y_true, y_pred = evaluate_model(model, val_loader, criterion, test_indices, y_scaler) print(f"测试集 MSE: {metrics['MSE']:.4f}, MAE: {metrics['MAE']:.4f}, R2: {metrics['R2']:.4f}") # 8. 保存所有结果 print("\n保存所有结果...") output_dir = Path(r"D:\result2") output_dir.mkdir(parents=True, exist_ok=True) # 保存评估指标 metrics_df = pd.DataFrame({ 'Metric': ['MSE', 'MAE', 'R2', 'MAPE', 'Test Loss'], 'Value': [metrics['MSE'], metrics['MAE'], metrics['R2'], metrics['MAPE'], metrics['test_loss']] }) metrics_df.to_csv(output_dir / 'evaluation_metrics.csv', index=False) # 保存训练历史 history_df = pd.DataFrame(history) history_df.to_csv(output_dir / 'training_history.csv', index=False) # 保存预测结果与原始数据 pred_indices = [i + timesteps - 1 for i in metrics['indices']] # 调整索引以匹配原始数据 # 确保我们有足够的datetime和locationCode数据 if len(datetimes) > max(pred_indices) and len(location_codes) > max(pred_indices): y_true = y_true.flatten() # 确保是一维 y_pred = y_pred.flatten() # 确保是一维 result_df = pd.DataFrame({ 'datetime': datetimes.iloc[pred_indices].values, 'locationCode': location_codes.iloc[pred_indices].values, 'true_value': y_true, 'predicted_value': y_pred }) # 有条件地添加分位数 if y_pred.shape[1] > 2: result_df['predicted_lower'] = y_pred[:, 0] # 10%分位数 result_df['predicted_upper'] = y_pred[:, 2] # 90%分位数 # 添加其他特征 for i, feature in enumerate(X_selected.columns): result_df[feature] = X_selected.iloc[pred_indices, i].values result_df.to_csv(output_dir / 'predictions_with_metadata.csv', index=False) else: print("警告: datetime或locationCode数据不足,无法完全匹配预测结果") # 保存基础预测结果 pd.DataFrame({ 'true_value': y_true.flatten(), 'predicted_value': y_pred.flatten() }).to_csv(output_dir / 'predictions.csv', index=False) # 9. 可视化结果 plt.figure(figsize=(12, 6)) plt.plot(history['train_loss'], label='训练损失') plt.plot(history['val_loss'], label='验证损失') plt.xlabel('Epoch') plt.ylabel('Loss') plt.title('训练过程') plt.legend() plt.savefig(output_dir / 'training_process.png', dpi=300) plt.close() # 添加预测结果可视化 plt.figure(figsize=(15, 6)) plt.plot(y_true[:200], label='真实值') plt.plot(y_pred[:200], label='预测值') # 只使用中位数预测 plt.title('预测结果对比') plt.legend() plt.savefig(output_dir / 'prediction_comparison.png', dpi=300) plt.show() # 误差分布图 errors = y_true - y_pred[:, 1] plt.hist(errors, bins=50) plt.title('预测误差分布') plt.savefig(output_dir / 'error_distribution.png', dpi=300) # 保存图像 plt.close() # # 添加分位数预测可视化 # plt.figure(figsize=(15, 6)) # plt.plot(y_true[:100], label='真实值') # plt.plot(y_pred[:100, 0], label='10%分位数') # plt.plot(y_pred[:100, 1], label='中位数') # plt.plot(y_pred[:100, 2], label='90%分位数') # plt.legend() # plt.savefig(output_dir / 'quantile_predictions.png', dpi=300) # 保存图像 # plt.close() # 9. 保存模型 if metrics['r2'] > 0.8: model_path = output_dir / 'best_model.pth' torch.save(model.state_dict(), model_path) print(f"模型保存成功: {model_path}") print(f"所有结果已保存到 {output_dir}") if __name__ == "__main__": warnings.filterwarnings('ignore') start_time = time.time() main() print(f"总运行时间: {(time.time() - start_time) / 60:.2f}分钟")import gc import time from pathlib import Path import matplotlib.pyplot as plt import numpy as np import pandas as pd import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import Dataset, DataLoader, TensorDataset from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error from sklearn.preprocessing import StandardScaler from skopt import gp_minimize from skopt.space import Real, Categorical, Integer import warnings import seaborn as sns from sklearn.preprocessing import RobustScaler from sklearn.model_selection import TimeSeriesSplit from scipy.stats import boxcox # 设置中文显示 plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False plt.switch_backend('TkAgg') # 设置路径 Path(r"D:\result2").mkdir(parents=True, exist_ok=True) Path("model_results/").mkdir(parents=True, exist_ok=True) # 检查GPU可用性 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"使用 {device} 进行训练") # 设置随机种子保证可重复性 torch.manual_seed(42) np.random.seed(42) # 1. 数据预处理模块 def load_and_preprocess_data(): """加载并预处理数据(内存安全版)""" chunksize = 10000 # 每次处理1万行 dfs = [] datetimes_list = [] location_codes_list = [] # 指定列数据类型以减少内存使用 dtype_dict = { 'damage_count': 'float32', 'damage_depth': 'float32', } for chunk in pd.read_csv( r"D:\my_data\clean\locationTransfer.csv", chunksize=chunksize, dtype=dtype_dict ): # 保存非数值列 datetimes_list.append(chunk['datetime'].copy()) location_codes_list.append(chunk['locationCode'].copy()) # 只处理数值列 numeric_cols = chunk.select_dtypes(include=[np.number]).columns chunk = chunk[numeric_cols] chunk = chunk.dropna(subset=['damage_count']) chunk = chunk[pd.to_numeric(chunk['damage_count'], errors='coerce').notna()] chunk = chunk.fillna(method='ffill').fillna(method='bfill') dfs.append(chunk) if len(dfs) > 10: # 测试时限制块数 break # 合并数据块 df = pd.concat(dfs, ignore_index=True) def create_lag_features(df, lags=3): for lag in range(1, lags + 1): df[f'damage_count_lag_{lag}'] = df['damage_count'].shift(lag) return df.dropna() df = create_lag_features(df) # 在合并df之后,填充na之前 df = df.dropna(subset=['damage_count']) datetimes = pd.concat(datetimes_list, ignore_index=True) location_codes = pd.concat(location_codes_list, ignore_index=True) # 确保长度一致 min_length = min(len(df), len(datetimes), len(location_codes)) df = df.iloc[:min_length] datetimes = datetimes.iloc[:min_length] location_codes = location_codes.iloc[:min_length] # 检查是否存在 NaN 值 nan_check = df.isnull().sum().sum() inf_check = df.isin([np.Inf, -np.Inf]).sum().sum() if nan_check > 0 or inf_check > 0: # 处理 NaN 值或者无穷大值 # 填充缺失值为均值 df = df.fillna(df.mean()) # 删除包含 NaN 值或者无穷大值的行 df = df.dropna() # 结构化特征 X_structured = df.drop(columns=['damage_count', 'damage_depth', 'damage_db', 'asset_code_mapping', 'pile_longitude', 'pile_latitude', 'locationCode', 'datetime', 'locationCode_encoded','damage_count_lag_1','damage_count_lag_2','damage_count_lag_3'], errors='ignore') # 填充缺失值 numeric_cols = X_structured.select_dtypes(include=[np.number]).columns for col in numeric_cols: X_structured[col] = X_structured[col].fillna(X_structured[col].mean()) # 标准化数据 scaler = RobustScaler() # 替换StandardScaler,更抗异常值 X_structured = pd.DataFrame(scaler.fit_transform(X_structured), columns=X_structured.columns) # 确保X_structured是DataFrame if not isinstance(X_structured, pd.DataFrame): X_structured = pd.DataFrame(X_structured, columns=[f"feature_{i}" for i in range(X_structured.shape[1])]) # X_structured = X_structured.values # 将DataFrame转换为NumPy数组 # 修改后的目标变量处理部分 y = df[['damage_count']].values.astype(np.float32) # 添加数据缩放 y_scaler = RobustScaler() y = y_scaler.fit_transform(y) # 使用标准化代替log变换 y = np.clip(y, -1e6, 1e6) # 设置合理的上下界 # 添加数据检查 assert not np.any(np.isinf(y)), "y中包含无限值" assert not np.any(np.isnan(y)), "y中包含NaN值" # 数据检查 print("原始数据统计:") print(f"最小值: {y.min()}, 最大值: {y.max()}, NaN数量: {np.isnan(y).sum()}") print("处理后y值范围:", np.min(y), np.max(y)) print("无限值数量:", np.isinf(y).sum()) # 清理内存 del df, chunk, dfs gc.collect() torch.cuda.empty_cache() return datetimes, X_structured, y, location_codes, scaler, y_scaler # 2. 时间序列数据集类 class TimeSeriesDataset(Dataset): """自定义时间序列数据集类""" def __init__(self, X, y, timesteps): # 确保输入是NumPy数组 if isinstance(X, pd.DataFrame): X = X.values if isinstance(y, pd.DataFrame) or isinstance(y, pd.Series): y = y.values assert X.ndim == 2, f"X应为2维,实际为{X.ndim}维" assert y.ndim == 2, f"y应为2维,实际为{y.ndim}维" # 添加维度调试信息 print(f"数据形状 - X: {X.shape}, y: {y.shape}") self.X = torch.FloatTensor(X).unsqueeze(-1) # [samples, timesteps, 1] self.y = torch.FloatTensor(y) self.timesteps = timesteps # 验证形状 if len(self.X) != len(self.y): raise ValueError("X和y的长度不匹配") def __len__(self): return len(self.X) - self.timesteps def __getitem__(self, idx): # [seq_len, num_features] x_window = self.X[idx:idx + self.timesteps] y_target = self.y[idx + self.timesteps - 1] return x_window.permute(1, 0), y_target # 调整维度顺序 def select_features_by_importance(X, y, n_features, feature_names=None): """使用随机森林选择特征(支持NumPy数组和DataFrame)""" # 确保X是二维数组 if isinstance(X, pd.DataFrame): feature_names = X.columns.tolist() X = X.values elif feature_names is None: feature_names = [f"feature_{i}" for i in range(X.shape[1])] # 处理y的维度 y = np.ravel(np.asarray(y)) y = np.nan_to_num(y, nan=np.nanmean(y)) # 检查特征数 if X.shape[1] < n_features: n_features = X.shape[1] print(f"警告: 特征数少于请求数,使用所有 {n_features} 个特征") # 训练随机森林 rf = RandomForestRegressor(n_estimators=250, random_state=42, n_jobs=-1) rf.fit(X, y) # 获取特征重要性 feature_importances = rf.feature_importances_ indices = np.argsort(feature_importances)[::-1][:n_features] # 返回选中的特征数据和重要性 return X[:, indices], feature_importances[indices], [feature_names[i] for i in indices] # 3. LSTM模型定义 class LSTMModel(nn.Module): """LSTM回归模型""" def __init__(self, input_size, hidden_size=64, num_layers=2, dropout=0.4): super().__init__() # 确保hidden_size*2能被num_heads整除 if (hidden_size * 2) % 4 != 0: hidden_size = ((hidden_size * 2) // 4) * 4 // 2 # 调整到最近的合规值 print(f"调整hidden_size为{hidden_size}以满足整除条件") self.lstm = nn.LSTM( input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout if num_layers > 1 else 0, bidirectional=True # 添加双向结构 ) # 添加维度检查的初始化 def weights_init(m): if isinstance(m, nn.Linear): if m.weight.dim() < 2: m.weight.data = m.weight.data.unsqueeze(0) # 确保至少2维 nn.init.xavier_uniform_(m.weight) if m.bias is not None: nn.init.constant_(m.bias, 0.0) self.bn = nn.BatchNorm1d(hidden_size * 2) # # 注意力机制层 self.attention = nn.MultiheadAttention(embed_dim=hidden_size * 2, num_heads=4) # 改用多头注意力 # 更深D输出层 self.fc = nn.Sequential( nn.Linear(hidden_size*2, hidden_size), nn.ReLU(), nn.Dropout(dropout), nn.Linear(hidden_size, 1) ) # 应用初始化 self.apply(weights_init) def forward(self, x): lstm_out, _ = self.lstm(x) # [batch, seq_len, hidden*2] lstm_out = lstm_out.permute(1, 0, 2) # [seq_len, batch, features] attn_out, _ = self.attention(lstm_out, lstm_out, lstm_out) attn_out = attn_out.permute(1, 0, 2) # 恢复为[batch, seq_len, features] return self.fc(attn_out[:, -1, :]).squeeze() def plot_feature_importance(feature_names, importance_values, save_path): """绘制特征重要性图""" # 验证输入 if len(feature_names) == 0 or len(importance_values) == 0: print("警告: 无特征重要性数据可绘制") return if len(feature_names) != len(importance_values): print(f"警告: 特征名数量({len(feature_names)})与重要性值数量({len(importance_values)})不匹配") # 取较小值 min_len = min(len(feature_names), len(importance_values)) feature_names = feature_names[:min_len] importance_values = importance_values[:min_len] # 按重要性排序 indices = np.argsort(importance_values)[::-1] sorted_features = [feature_names[i] for i in indices] sorted_importance = importance_values[indices] plt.figure(figsize=(12, 8)) plt.bar(range(len(sorted_features)), sorted_importance, align="center") plt.xticks(range(len(sorted_features)), sorted_features, rotation=90) plt.xlabel("特征") plt.ylabel("重要性得分") plt.title("特征重要性排序") plt.tight_layout() # 确保保存路径存在 save_path.parent.mkdir(parents=True, exist_ok=True) plt.savefig(save_path, dpi=300) plt.close() def evaluate(model, val_loader, criterion): model.eval() val_loss = 0.0 with torch.no_grad(): for inputs, targets in val_loader: inputs, targets = inputs.to(device), targets.to(device) outputs = model(inputs) loss = criterion(outputs, targets.squeeze()) val_loss += loss.item() return val_loss / len(val_loader) # 4. 模型训练函数 def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler=None, epochs=100, patience=30): """训练模型并返回最佳模型和训练历史""" best_loss = float('inf') history = {'train_loss': [], 'val_loss': []} # 添加梯度累积 accumulation_steps = 5 # 每4个batch更新一次参数 for epoch in range(epochs): # 训练阶段 model.train() train_loss = 0.0 optimizer.zero_grad() for inputs, targets in train_loader: inputs, targets = inputs.to(device), targets.to(device) scaler = torch.cuda.amp.GradScaler() # 在训练循环中添加 with torch.cuda.amp.autocast(): outputs = model(inputs) loss = criterion(outputs, targets.squeeze()) scaler.scale(loss).backward() for batch_idx, (inputs, targets) in enumerate(train_loader): inputs, targets = inputs.to(device), targets.to(device) # 前向传播 outputs = model(inputs) loss = criterion(outputs, targets.squeeze()) # 梯度累积 loss = loss / accumulation_steps scaler.scale(loss).backward() if (batch_idx + 1) % accumulation_steps == 0: scaler.unscale_(optimizer) torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) scaler.step(optimizer) scaler.update() optimizer.zero_grad() train_loss += loss.item() * accumulation_steps # 验证 val_loss = evaluate(model, val_loader, criterion) if scheduler: scheduler.step(val_loss) # 根据验证损失调整学习率 # 记录历史 avg_train_loss = train_loss / len(train_loader) history['train_loss'].append(avg_train_loss) history['val_loss'].append(val_loss) # 早停逻辑 if val_loss < best_loss * 0.99:# 相对改进阈值 best_loss = val_loss best_epoch = epoch torch.save(model.state_dict(), 'best_model.pth') print(f"Epoch {epoch + 1}/{epochs} - 训练损失: {avg_train_loss :.4f} - 验证损失: {val_loss:.4f}") # 早停判断 if epoch - best_epoch >= patience: print(f"早停触发,最佳epoch: {best_epoch+1}") break # 加载最佳模型 model.load_state_dict(torch.load('best_model.pth')) return model, history # 5. 贝叶斯优化函数 def optimize_hyperparameters(X_train, y_train, input_size): """使用贝叶斯优化寻找最佳超参数""" # 自定义评分函数 def score_fn(params): """内部评分函数""" try: params = adjust_hidden_size(params) # 调整参数 hidden_size, num_layers, dropout, lr, batch_size, timesteps = params # 确保参数有效 batch_size = max(32, min(256, int(batch_size))) timesteps = max(3, min(10, int(timesteps))) dropout = min(0.5, max(0.1, float(dropout))) lr = min(0.01, max(1e-5, float(lr))) # 检查数据是否足够 if len(X_train) < 2 * timesteps+1: # 至少需要2倍时间步长的数据 return float('inf') # 创建模型 model = LSTMModel( input_size=input_size, hidden_size=int(hidden_size), num_layers=min(3, int(num_layers)), dropout=min(0.5, float(dropout)) ).to(device) # 初始化权重 # for name, param in model.named_parameters(): # if 'weight' in name: # nn.init.xavier_normal_(param) # elif 'bias' in name: # nn.init.constant_(param, 0.1) # 损失函数和优化器 criterion = nn.HuberLoss(delta=1.0) optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-3) # 创建数据加载器 dataset = TimeSeriesDataset(X_train, y_train, timesteps=int(timesteps)) # 简化验证流程 train_size = int(0.8 * len(dataset)) train_dataset = torch.utils.data.Subset(dataset, range(train_size)) val_dataset = torch.utils.data.Subset(dataset, range(train_size, len(dataset))) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False) # 简单训练和验证 model.train() for epoch in range(15): # 减少epoch数以加快评估 for inputs, targets in train_loader: inputs, targets = inputs.to(device), targets.to(device) optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, targets.squeeze()) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) optimizer.step() # 验证 model.eval() val_loss = 0.0 with torch.no_grad(): for inputs, targets in val_loader: outputs = model(inputs.to(device)) loss = criterion(outputs, targets.squeeze().to(device)) if torch.isnan(loss) or torch.isinf(loss): return float('inf') val_loss += loss.item() return val_loss / len(val_loader) except Exception as e: print(f"参数评估失败: {str(e)}") return float('inf') # 定义搜索空间 search_spaces = [ Integer(32, 128, name='hidden_size'), Integer(1, 3, name='num_layers'), Real(0.2, 0.5, name='dropout'), Real(5e-4, 1e-3, prior='log-uniform', name='lr'), Categorical([64, 128, 256], name='batch_size'), Integer(3, 10, name='timesteps') # 优化时间步长 ] def adjust_hidden_size(params): """确保hidden_size*2能被4整除""" hs = params[0] params[0] = ((hs * 2) // 4) * 4 // 2 return params result = gp_minimize( score_fn, search_spaces, n_calls=50, random_state=42, verbose=True, n_jobs=1 # 并行执行 ) # 提取最佳参数 best_params = { 'hidden_size': result.x[0], 'num_layers': result.x[1], 'dropout': result.x[2], 'lr': result.x[3], 'batch_size': result.x[4], 'timesteps': result.x[5] } print("优化完成,最佳参数:", best_params) return best_params def collate_fn(batch): """增强型数据批处理函数""" # 解包批次数据 inputs, targets = zip(*batch) # 维度转换 (已包含在数据集中) # inputs = [batch_size, features, seq_len] # 数据增强(可选) # 添加高斯噪声 noise = torch.randn_like(inputs) * 0.05 inputs = inputs + noise # 归一化处理(可选) # mean = inputs.mean(dim=(1,2), keepdim=True) # std = inputs.std(dim=(1,2), keepdim=True) # inputs = (inputs - mean) / (std + 1e-8) return inputs.permute(0, 2, 1), torch.stack(targets) # [batch, seq_len, features] # 6. 评估函数 def evaluate_model(model, test_loader, criterion, test_indices, y_scaler=None): """评估模型性能""" model.eval() test_loss = 0.0 y_true = [] y_pred = [] all_indices = [] with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(test_loader): inputs, targets = inputs.to(device), targets.to(device) outputs = model(inputs) if outputs.dim() == 1: outputs = outputs.unsqueeze(1) loss = criterion(outputs, targets) test_loss += loss.item() * inputs.size(0) # 收集预测结果 y_true.extend(targets.cpu().numpy()) y_pred.extend(outputs.cpu().numpy()) # 获取原始数据集中的索引 current_indices = test_indices[batch_idx * test_loader.batch_size: (batch_idx + 1) * test_loader.batch_size] all_indices.extend(current_indices) y_true = np.array(y_true).reshape(-1) y_pred = np.array(y_pred).reshape(-1) if y_scaler is not None: y_true = y_scaler.inverse_transform(y_true.reshape(-1, 1)).flatten() y_pred = y_scaler.inverse_transform(y_pred.reshape(-1, 1)).flatten() # 基础指标 metrics = { 'MSE': mean_squared_error(y_true, y_pred), 'RMSE': np.sqrt(mean_squared_error(y_true, y_pred)), 'MAE': mean_absolute_error(y_true, y_pred), 'R2': r2_score(y_true, y_pred), 'MAPE': np.mean(np.abs((y_true - y_pred) / (y_true + 1e-8))) * 100, # 避免除0 'indices': all_indices, # 添加原始索引 'y_true_original': y_true, 'y_pred_original': y_pred, 'test_loss': test_loss } # 可视化误差分布 errors = y_true - y_pred plt.figure(figsize=(12, 6)) sns.histplot(errors, kde=True, bins=50) plt.title('Error Distribution') plt.savefig('error_distribution.png') plt.close() return metrics, y_true, y_pred def collate_fn(batch): """增强型数据批处理函数""" # 解包批次数据 inputs, targets = zip(*batch) # 维度转换 (已包含在数据集中) # inputs = [batch_size, features, seq_len] # 数据增强(可选) # 添加高斯噪声 noise = torch.randn_like(inputs) * 0.05 inputs = inputs + noise # 归一化处理(可选) # mean = inputs.mean(dim=(1,2), keepdim=True) # std = inputs.std(dim=(1,2), keepdim=True) # inputs = (inputs - mean) / (std + 1e-8) return inputs.permute(0, 2, 1), torch.stack(targets) # [batch, seq_len, features] # 7. 主函数 def main(): # 1. 加载和预处理数据 print("正在加载和预处理数据...") datetimes, X_structured, y, location_codes, scaler , y_scaler= load_and_preprocess_data() # 2. 特征选择 print('正在进行特征选择') # 修改为选择前15%特征 n_features = int(X_structured.shape[1] * 0.15) X_selected, feature_importances, top_features = select_features_by_importance( X_structured, y, n_features ) X_selected = X_structured[top_features] print(f"选择后的特征及其重要性:") for feature, importance in zip(top_features, feature_importances): print(f"{feature}: {importance:.4f}") print(X_selected) # 绘制特征重要性图 plot_feature_importance(top_features, feature_importances, Path("feature_importance.png")) # 3. 创建时间序列数据集 print("正在创建时间序列数据集...") timesteps = 5 dataset = TimeSeriesDataset(X_selected, y, timesteps) # 4. 数据划分 train_size = int(0.8 * len(dataset)) train_indices = list(range(train_size)) test_indices = list(range(train_size, len(dataset))) train_dataset = torch.utils.data.Subset(dataset, train_indices) test_dataset = torch.utils.data.Subset(dataset, test_indices) # 5. 贝叶斯优化超参数 print("正在进行贝叶斯优化...") try: best_params = optimize_hyperparameters( X_selected.iloc[:train_size], y[:train_size].copy(), input_size=X_selected.shape[1] ) print("最佳参数:", best_params) except Exception as e: print(f"贝叶斯优化失败: {str(e)}") # 6. 使用最佳参数训练最终模型 torch.cuda.empty_cache() # 清理 GPU 缓存 print("\n使用最佳参数训练模型...") # 获取并验证batch_size batch_size = int(best_params.get('batch_size')) print(f"实际使用的batch_size类型: {type(batch_size)}, 值: {batch_size}") # 调试输出 model = LSTMModel( input_size=X_selected.shape[1], hidden_size=int(best_params['hidden_size']), num_layers=int(best_params['num_layers']), dropout=float(best_params['dropout']) ).to(device) # 数据加载器 train_loader = DataLoader( train_dataset, batch_size=int(batch_size), shuffle=True, # 训练集需要打乱 collate_fn=collate_fn, num_workers=4, # 多进程加载 pin_memory=True # 加速GPU传输 ) val_loader = DataLoader( test_dataset, batch_size=int(batch_size)*2, # 更大的批次提升验证效率 shuffle=False, # 验证集不需要打乱 collate_fn=lambda batch: ( torch.stack([x for x, y in batch]).permute(0, 2, 1), torch.stack([y for x, y in batch]) ), num_workers=2, pin_memory=True ) # 损失函数和优化器 criterion = nn.HuberLoss(delta=1.0) optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-5) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=50) # 训练模型 model, history = train_model( model, train_loader, val_loader, criterion, optimizer, scheduler=scheduler, epochs=200, patience=15) torch.cuda.empty_cache() # 清理 GPU 缓存 # 7. 评估模型 print("\n评估模型性能...") metrics, y_true, y_pred = evaluate_model(model, val_loader, criterion, test_indices, y_scaler) print(f"测试集 MSE: {metrics['MSE']:.4f}, MAE: {metrics['MAE']:.4f}, R2: {metrics['R2']:.4f}") # 8. 保存所有结果 print("\n保存所有结果...") output_dir = Path(r"D:\result2") output_dir.mkdir(parents=True, exist_ok=True) # 保存评估指标 metrics_df = pd.DataFrame({ 'Metric': ['MSE', 'MAE', 'R2', 'MAPE', 'Test Loss'], 'Value': [metrics['MSE'], metrics['MAE'], metrics['R2'], metrics['MAPE'], metrics['test_loss']] }) metrics_df.to_csv(output_dir / 'evaluation_metrics.csv', index=False) # 保存训练历史 history_df = pd.DataFrame(history) history_df.to_csv(output_dir / 'training_history.csv', index=False) # 保存预测结果与原始数据 pred_indices = [i + timesteps - 1 for i in metrics['indices']] # 调整索引以匹配原始数据 # 确保我们有足够的datetime和locationCode数据 if len(datetimes) > max(pred_indices) and len(location_codes) > max(pred_indices): y_true = y_true.flatten() # 确保是一维 y_pred = y_pred.flatten() # 确保是一维 result_df = pd.DataFrame({ 'datetime': datetimes.iloc[pred_indices].values, 'locationCode': location_codes.iloc[pred_indices].values, 'true_value': y_true, 'predicted_value': y_pred }) # 有条件地添加分位数 if y_pred.shape[1] > 2: result_df['predicted_lower'] = y_pred[:, 0] # 10%分位数 result_df['predicted_upper'] = y_pred[:, 2] # 90%分位数 # 添加其他特征 for i, feature in enumerate(X_selected.columns): result_df[feature] = X_selected.iloc[pred_indices, i].values result_df.to_csv(output_dir / 'predictions_with_metadata.csv', index=False) else: print("警告: datetime或locationCode数据不足,无法完全匹配预测结果") # 保存基础预测结果 pd.DataFrame({ 'true_value': y_true.flatten(), 'predicted_value': y_pred.flatten() }).to_csv(output_dir / 'predictions.csv', index=False) # 9. 可视化结果 plt.figure(figsize=(12, 6)) plt.plot(history['train_loss'], label='训练损失') plt.plot(history['val_loss'], label='验证损失') plt.xlabel('Epoch') plt.ylabel('Loss') plt.title('训练过程') plt.legend() plt.savefig(output_dir / 'training_process.png', dpi=300) plt.close() # 添加预测结果可视化 plt.figure(figsize=(15, 6)) plt.plot(y_true[:200], label='真实值') plt.plot(y_pred[:200], label='预测值') # 只使用中位数预测 plt.title('预测结果对比') plt.legend() plt.savefig(output_dir / 'prediction_comparison.png', dpi=300) plt.show() # 误差分布图 errors = y_true - y_pred[:, 1] plt.hist(errors, bins=50) plt.title('预测误差分布') plt.savefig(output_dir / 'error_distribution.png', dpi=300) # 保存图像 plt.close() # # 添加分位数预测可视化 # plt.figure(figsize=(15, 6)) # plt.plot(y_true[:100], label='真实值') # plt.plot(y_pred[:100, 0], label='10%分位数') # plt.plot(y_pred[:100, 1], label='中位数') # plt.plot(y_pred[:100, 2], label='90%分位数') # plt.legend() # plt.savefig(output_dir / 'quantile_predictions.png', dpi=300) # 保存图像 # plt.close() # 9. 保存模型 if metrics['r2'] > 0.8: model_path = output_dir / 'best_model.pth' torch.save(model.state_dict(), model_path) print(f"模型保存成功: {model_path}") print(f"所有结果已保存到 {output_dir}") if __name__ == "__main__": warnings.filterwarnings('ignore') start_time = time.time() main() print(f"总运行时间: {(time.time() - start_time) / 60:.2f}分钟")参数评估失败: permute(sparse_coo): number of dimensions in the tensor input does not match the length of the desired ordering of dimensions i.e. input.dim() = 3 is not equal to len(dims) = 2 Iteration No: 5 ended. Evaluation done at random point. Time taken: 0.0120 Function value obtained: inf Current minimum: inf Iteration No: 6 started. Evaluating function at random point. 数据形状 - X: (21168, 3), y: (21168, 1) 参数评估失败: permute(sparse_coo): number of dimensions in the tensor input does not match the length of the desired ordering of dimensions i.e. input.dim() = 3 is not equal to len(dims) = 2 Iteration No: 6 ended. Evaluation done at random point. Time taken: 0.0170 Function value obtained: inf Current minimum: inf Iteration No: 7 started. Evaluating function at random point. 数据形状 - X: (21168, 3), y: (21168, 1) 参数评估失败: permute(sparse_coo): number of dimensions in the tensor input does not match the length of the desired ordering of dimensions i.e. input.dim() = 3 is not equal to len(dims) = 2 Iteration No: 7 ended. Evaluation done at random point. Time taken: 0.0126 Function value obtained: inf Current minimum: inf Iteration No: 8 started. Evaluating function at random point. 数据形状 - X: (21168, 3), y: (21168, 1) 参数评估失败: permute(sparse_coo): number of dimensions in the tensor input does not match the length of the desired ordering of dimensions i.e. input.dim() = 3 is not equal to len(dims) = 2 Iteration No: 8 ended. Evaluation done at random point. Time taken: 0.0126 Function value obtained: inf Current minimum: inf Iteration No: 9 started. Evaluating function at random point. 数据形状 - X: (21168, 3), y: (21168, 1) 参数评估失败: permute(sparse_coo): number of dimensions in the tensor input does not match the length of the desired ordering of dimensions i.e. input.dim() = 3 is not equal to len(dims) = 2 Iteration No: 9 ended. Evaluation done at random point. Time taken: 0.0085 Function value obtained: inf Current minimum: inf Iteration No: 10 started. Evaluating function at random point. 数据形状 - X: (21168, 3), y: (21168, 1) 参数评估失败: permute(sparse_coo): number of dimensions in the tensor input does not match the length of the desired ordering of dimensions i.e. input.dim() = 3 is not equal to len(dims) = 2 贝叶斯优化失败: Input y contains infinity or a value too large for dtype('float64').结合代码分析为啥优化失败 并且改造

import os import re import glob import numpy as np import pandas as pd import matplotlib.pyplot as plt from pyproj import Transformer from sklearn.preprocessing import StandardScaler import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import Dataset, DataLoader from torch.utils.data import random_split plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False # ============================== # 1. 增强型文件加载器 # ============================== class EnhancedTropoLoader: def __init__(self, data_root): self.data_root = data_root self.transformer = Transformer.from_crs("EPSG:4978", "EPSG:4326") self.site_cache = {} self.feature_names = [ 'trotot', 'tgntot', 'tgetot', 'stddev', 'lat', 'lon', 'alt', 'hour' ] def _parse_site_code(self, filename): """改进版站点代码解析,支持多种格式""" patterns = [ r"_([A-Z]{4})\d{2}[A-Z]{3}_TRO\.TRO$", # ABMF00GLP → ABMF r"_([A-Z]{4}\d{2})[A-Z]{3}_TRO\.TRO$", # AC2300USA → AC23 r"_([A-Z]{4})00([A-Z]{3})_TRO\.TRO$", # ABPO00MDG → ABPO r"_([A-Z]{4})_TRO\.TRO$" # ABPO_TRO.TRO → ABPO ] for pattern in patterns: match = re.search(pattern, filename) if match: code = match.group(1) # 清理尾部数字(如果存在) return re.sub(r'\d{2}$', '', code) if len(code) > 4 else code return None def _parse_file(self, file_path): """解析单个文件""" try: # 获取站点代码 filename = os.path.basename(file_path) site_code = self._parse_site_code(filename) if not site_code: print(f"跳过无法解析站点的文件: {file_path}") return None # 读取坐标 coordinates = self._get_coordinates(file_path) if not coordinates: print(f"跳过无有效坐标的文件: {file_path}") return None # 坐标转换 lat, lon, alt = self.transformer.transform( coordinates['x'], coordinates['y'], coordinates['z'] ) if None in (lat, lon, alt): return None # 读取观测数据 records = self._read_observations(file_path, site_code) if len(records) < 10: print(f"跳过数据不足的文件: {file_path}") return None # 创建DataFrame df = pd.DataFrame(records) df['lat'] = lat df['lon'] = lon df['alt'] = alt return df except Exception as e: print(f"文件解析失败 [{file_path}]: {str(e)}") return None def _get_coordinates(self, file_path): """获取站点坐标""" if file_path in self.site_cache: return self.site_cache[file_path] coordinates = None try: with open(file_path, 'r') as f: current_section = None for line in f: line = line.strip() if line.startswith('+'): current_section = line[1:] elif line.startswith('-'): current_section = None elif current_section == 'TROP/STA_COORDINATES' and not line.startswith('*'): parts = line.split() if len(parts) >= 7: coordinates = { 'x': float(parts[4]), 'y': float(parts[5]), 'z': float(parts[6]) } break except Exception as e: print(f"坐标解析失败: {str(e)}") self.site_cache[file_path] = coordinates return coordinates def _read_observations(self, file_path, site_code): """读取观测数据""" records = [] try: with open(file_path, 'r') as f: current_section = None for line in f: line = line.strip() if line.startswith('+'): current_section = line[1:] elif line.startswith('-'): current_section = None elif current_section == 'TROP/SOLUTION' and not line.startswith('*'): parts = line.split() if len(parts) >= 7: records.append({ 'epoch': parts[1], 'trotot': float(parts[2]), 'stddev': float(parts[3]), 'tgntot': float(parts[4]), 'tgetot': float(parts[6]), 'site': site_code }) except Exception as e: print(f"观测数据读取失败: {str(e)}") return records def load_all_data(self): """加载所有数据""" all_dfs = [] for file_path in glob.glob(os.path.join(self.data_root, '**', '*.TRO'), recursive=True): df = self._parse_file(file_path) if df is not None: all_dfs.append(df) print(f"成功加载: {file_path} 记录数: {len(df)}") return pd.concat(all_dfs) if all_dfs else pd.DataFrame() # ============================== # 2. 时间序列数据集 # ============================== class TemporalDataset(Dataset): def __init__(self, data, window_size=6): self.window_size = window_size self.site_to_id = {site: idx for idx, site in enumerate(data['site'].unique())} # 按站点和时间排序 data = data.sort_values(['site', 'time']) # 生成序列 self.sequences = [] self.targets = [] self.site_labels = [] self.timestamps = [] for site, group in data.groupby('site'): values = group[self.feature_names].values times = group['time'].values unix_times = (times.astype(np.datetime64) - np.datetime64('1970-01-01T00:00:00')) / np.timedelta64(1, 's') for i in range(len(values) - self.window_size): self.sequences.append(values[i:i + self.window_size]) self.targets.append(values[i + self.window_size][0]) self.site_labels.append(self.site_to_id[site]) self.timestamps.append(unix_times[i + self.window_size]) self.num_samples = len(self.sequences) def __len__(self): return self.num_samples def __getitem__(self, idx): # 添加高斯噪声增强 noise = torch.randn(self.window_size, len(self.feature_names)) * 0.01 return ( torch.FloatTensor(self.sequences[idx]) + noise, torch.FloatTensor([self.targets[idx]]), torch.tensor(self.site_labels[idx], dtype=torch.long), torch.FloatTensor([self.timestamps[idx]]) ) # ============================== # 3. 改进的LSTM模型 # ============================== class EnhancedLSTM(nn.Module): def __init__(self, input_size, num_sites, hidden_size=128): super().__init__() self.embedding = nn.Embedding(num_sites, 16) self.lstm = nn.LSTM( input_size, hidden_size, num_layers=3, bidirectional=True, batch_first=True, dropout=0.4 ) self.attention = nn.Sequential( nn.Linear(hidden_size * 2, 32), nn.Tanh(), nn.Linear(32, 1), nn.Softmax(dim=1) ) self.regressor = nn.Sequential( nn.Linear(hidden_size * 2 + 16, 64), nn.LayerNorm(64), nn.ReLU(), nn.Dropout(0.3), nn.Linear(64, 32), nn.LayerNorm(32), nn.ReLU(), nn.Linear(32, 1) ) def forward(self, x, site_ids): lstm_out, _ = self.lstm(x) attn_weights = self.attention(lstm_out) context = torch.sum(attn_weights * lstm_out, dim=1) site_emb = self.embedding(site_ids) combined = torch.cat([context, site_emb], dim=1) return self.regressor(combined) # ============================== # 4. 训练管理器 # ============================== class TrainingManager: def __init__(self, data_root): self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.loader = EnhancedTropoLoader(data_root) self.scaler = StandardScaler() def _preprocess(self, raw_df): """数据预处理""" # 时间解析 raw_df['time'] = raw_df['epoch'].apply( lambda x: pd.to_datetime( f"20{x.split(':')[0]}-{x.split(':')[1]}", format='%Y-%j' ) + pd.to_timedelta(int(x.split(':')[2]), unit='s') ) raw_df = raw_df.dropna(subset=['time']) # 特征工程 raw_df['hour'] = raw_df['time'].dt.hour raw_df['doy_sin'] = np.sin(2 * np.pi * raw_df['time'].dt.dayofyear / 365) raw_df['doy_cos'] = np.cos(2 * np.pi * raw_df['time'].dt.dayofyear / 365) # 标准化 raw_df[self.loader.feature_names] = self.scaler.fit_transform( raw_df[self.loader.feature_names] ) return raw_df def train(self, window_size=6, epochs=200, batch_size=64): # 加载数据 raw_df = self.loader.load_all_data() if raw_df.empty: raise ValueError("未加载到有效数据") # 预处理 processed_df = self._preprocess(raw_df) # 创建数据集 full_dataset = TemporalDataset(processed_df, window_size) print(f"数据集样本数量: {len(full_dataset)}") # 划分数据集 train_size = int(0.8 * len(full_dataset)) test_size = len(full_dataset) - train_size train_dataset, test_dataset = random_split( full_dataset, [train_size, test_size], generator=torch.Generator().manual_seed(42) ) # 初始化模型 model = EnhancedLSTM( input_size=len(self.loader.feature_names), num_sites=len(full_dataset.site_to_id), hidden_size=128 ).to(self.device) # 训练配置 optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-5) scheduler = optim.lr_scheduler.OneCycleLR( optimizer, max_lr=1e-3, steps_per_epoch=len(train_loader), epochs=epochs ) criterion = nn.MSELoss() # 训练循环 train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) best_loss = float('inf') history = {'train': [], 'val': []} for epoch in range(epochs): model.train() train_loss = 0 for seq, target, site, _ in train_loader: seq = seq.to(self.device) target = target.to(self.device) site = site.to(self.device) optimizer.zero_grad() pred = model(seq, site) loss = criterion(pred, target) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), 1.0) optimizer.step() train_loss += loss.item() scheduler.step() # 验证 model.eval() val_loss = 0 with torch.no_grad(): val_loader = DataLoader(test_dataset, batch_size=256) for seq, target, site, _ in val_loader: seq = seq.to(self.device) target = target.to(self.device) site = site.to(self.device) pred = model(seq, site) val_loss += criterion(pred, target).item() avg_train = train_loss / len(train_loader) avg_val = val_loss / len(val_loader) history['train'].append(avg_train) history['val'].append(avg_val) # 保存最佳模型 if avg_val < best_loss: best_loss = avg_val torch.save(model.state_dict(), 'best_model.pth') print(f"Epoch {epoch + 1:03d} | Train Loss: {avg_train:.4f} | Val Loss: {avg_val:.4f}") return model, history def evaluate(self, model, output_dir='results'): """评估与结果保存""" os.makedirs(output_dir, exist_ok=True) # 重新加载数据 raw_df = self.loader.load_all_data() processed_df = self._preprocess(raw_df) full_dataset = TemporalDataset(processed_df, window_size=6) # 预测 model.eval() results = [] with torch.no_grad(): test_loader = DataLoader(full_dataset, batch_size=256) for seq, target, site, timestamp in test_loader: seq = seq.to(self.device) site = site.to(self.device) pred = model(seq, site).cpu().numpy().flatten() true = target.numpy().flatten() times = pd.to_datetime(timestamp.numpy().flatten(), unit='s') for p, t, s, ts in zip(pred, true, site, times): results.append({ 'site': list(full_dataset.site_to_id.keys())[s], 'timestamp': ts, 'true': t, 'pred': p }) # 反标准化 result_df = pd.DataFrame(results) dummy = np.zeros((len(result_df), len(self.loader.feature_names))) dummy[:, 0] = result_df['true'] result_df['true'] = self.scaler.inverse_transform(dummy)[:, 0] dummy[:, 0] = result_df['pred'] result_df['pred'] = self.scaler.inverse_transform(dummy)[:, 0] # 保存结果 self._save_results(result_df, output_dir) return result_df def _save_results(self, df, output_dir): """保存结果和可视化""" # 按站点保存 for site, group in df.groupby('site'): site_dir = os.path.join(output_dir, site) os.makedirs(site_dir, exist_ok=True) # 保存数据 csv_path = os.path.join(site_dir, f'{site}_predictions.csv') group.to_csv(csv_path, index=False) # 生成可视化 self._plot_predictions(group, site, site_dir) # 保存汇总 df.to_csv(os.path.join(output_dir, 'all_predictions.csv'), index=False) print(f"结果已保存至 {output_dir}") def _plot_predictions(self, data, site, save_dir): """生成可视化图表""" plt.figure(figsize=(16, 9)) plt.plot(data['timestamp'], data['true'], label='真实值', linewidth=1.5) plt.plot(data['timestamp'], data['pred'], label='预测值', linestyle='--', alpha=0.8) plt.title(f'站点 {site} 对流层延迟预测 (MAE: {np.mean(np.abs(data["true"] - data["pred"])):.2f}mm)') plt.xlabel('时间') plt.ylabel('延迟量 (mm)') plt.legend() plt.grid(True) plt.gcf().autofmt_xdate() plot_path = os.path.join(save_dir, f'{site}_comparison.png') plt.savefig(plot_path, dpi=300, bbox_inches='tight') plt.close() # ============================== # 主程序 # ============================== if __name__ == "__main__": try: trainer = TrainingManager(data_root='./data') model, history = trainer.train(epochs=200) results = trainer.evaluate(model) # 生成统计报告 report = results.groupby('site').apply(lambda x: pd.Series({ 'MAE(mm)': np.mean(np.abs(x['true'] - x['pred'])), 'Max_True': x['true'].max(), 'Min_True': x['true'].min(), 'Max_Pred': x['pred'].max(), 'Min_Pred': x['pred'].min(), 'Samples': len(x) })).reset_index() print("\n站点预测性能报告:") print(report.to_markdown(index=False)) # 绘制训练曲线 plt.figure(figsize=(12, 6)) plt.plot(history['train'], label='训练损失') plt.plot(history['val'], label='验证损失') plt.title('训练过程') plt.xlabel('Epoch') plt.ylabel('MSE Loss') plt.legend() plt.savefig('training_history.png', bbox_inches='tight') except Exception as e: print(f"运行出错: {str(e)}") "D:\Pycharm 2024\idle\pythonProject1\.venv\Scripts\python.exe" D:\idle\test-lstm\LSTM_TROP_TEST.py 成功加载: ./data\IGS0OPSFIN_20250010000_01D_05M_ABMF00GLP_TRO.TRO 记录数: 288 跳过无法解析站点的文件: ./data\IGS0OPSFIN_20250010000_01D_05M_AC2300USA_TRO.TRO 成功加载: ./data\IGS0OPSFIN_20250020000_01D_05M_ABMF00GLP_TRO.TRO 记录数: 288 成功加载: ./data\IGS0OPSFIN_20250020000_01D_05M_ABPO00MDG_TRO.TRO 记录数: 288 运行出错: 'TemporalDataset' object has no attribute 'feature_names' 进程已结束,退出代码为 0

重新发送给我这份代码更改后的完整代码,仔细检查不要有错误和遗漏,不要省略任何部分(逻辑相似的也不行): import os import re import glob import numpy as np import pandas as pd import matplotlib.pyplot as plt from pyproj import Transformer from sklearn.preprocessing import StandardScaler import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import Dataset, DataLoader # 设置中文显示 plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False class DirectoryBasedLoader: def __init__(self, data_root): self.data_root = data_root self.transformer = Transformer.from_crs("EPSG:4978", "EPSG:4326") self.site_cache = {} self.feature_names = [ 'trotot', 'tgntot', 'tgetot', 'stddev', 'lat', 'lon', 'alt', 'hour', 'doy_sin', 'doy_cos' ] def _parse_coordinates(self, file_path): """解析站点坐标(带缓存)""" if file_path in self.site_cache: return self.site_cache[file_path] coordinates = None try: with open(file_path, 'r', encoding='utf-8') as f: current_section = None for line in f: line = line.strip() if line.startswith('+'): current_section = line[1:] elif line.startswith('-'): current_section = None elif current_section == 'TROP/STA_COORDINATES' and not line.startswith('*'): parts = line.split() if len(parts) >= 7: coordinates = { 'x': float(parts[4]), 'y': float(parts[5]), 'z': float(parts[6]) } break except UnicodeDecodeError: try: with open(file_path, 'r', encoding='latin-1') as f: # 相同的内容解析逻辑... current_section = None for line in f: line = line.strip() if line.startswith('+'): current_section = line[1:] elif line.startswith('-'): current_section = None elif current_section == 'TROP/STA_COORDINATES' and not line.startswith('*'): parts = line.split() if len(parts) >= 7: coordinates = { 'x': float(parts[4]), 'y': float(parts[5]), 'z': float(parts[6]) } break except Exception as e: print(f"坐标解析失败: {str(e)}") except Exception as e: print(f"坐标解析失败: {str(e)}") self.site_cache[file_path] = coordinates return coordinates def _convert_coords(self, coords): """坐标转换(带异常处理)""" try: lat, lon, alt = self.transformer.transform( coords['x'], coords['y'], coords['z'] ) return lat, lon, alt except Exception as e: print(f"坐标转换失败: {str(e)}") return None, None, None def _parse_observations(self, file_path, site_code): """解析观测数据""" records = [] try: with open(file_path, 'r', encoding='utf-8') as f: current_section = None for line in f: line = line.strip() if line.startswith('+'): current_section = line[1:] elif line.startswith('-'): current_section = None elif current_section == 'TROP/SOLUTION' and not line.startswith('*'): parts = line.split() if len(parts) >= 7: records.append({ 'epoch': parts[1], 'trotot': float(parts[2]), 'stddev': float(parts[3]), 'tgntot': float(parts[4]), 'tgetot': float(parts[6]), 'site': site_code # 使用目录名作为站点代码 }) except UnicodeDecodeError: try: with open(file_path, 'r', encoding='latin-1') as f: # 相同的内容解析逻辑... current_section = None for line in f: line = line.strip() if line.startswith('+'): current_section = line[1:] elif line.startswith('-'): current_section = None elif current_section == 'TROP/SOLUTION' and not line.startswith('*'): parts = line.split() if len(parts) >= 7: records.append({ 'epoch': parts[1], 'trotot': float(parts[2]), 'stddev': float(parts[3]), 'tgntot': float(parts[4]), 'tgetot': float(parts[6]), 'site': site_code # 使用目录名作为站点代码 }) except Exception as e: print(f"观测数据读取失败: {str(e)}") except Exception as e: print(f"观测数据读取失败: {str(e)}") return records def load_all_data(self): """加载目录结构数据""" all_dfs = [] # 获取所有站点目录 site_dirs = [d for d in glob.glob(os.path.join(self.data_root, '*')) if os.path.isdir(d)] for site_dir in site_dirs: site_code = os.path.basename(site_dir) print(f"正在加载站点: {site_code}") # 加载该站点所有数据文件 for file_path in glob.glob(os.path.join(site_dir, '*.TRO')): # 解析坐标 coords = self._parse_coordinates(file_path) if not coords: print(f"跳过无有效坐标的文件: {file_path}") continue # 坐标转换 lat, lon, alt = self._convert_coords(coords) if None in (lat, lon, alt): continue # 解析观测数据 records = self._parse_observations(file_path, site_code) if not records: print(f"跳过无有效数据的文件: {file_path}") continue # 创建DataFrame df = pd.DataFrame(records) df['lat'] = lat df['lon'] = lon df['alt'] = alt all_dfs.append(df) print(f"成功加载: {file_path} 记录数: {len(df)}") return pd.concat(all_dfs) if all_dfs else pd.DataFrame() # ============================== # 2. 时间序列数据集 # ============================== # ============================== # 2. 时间序列数据集(修正后) # ============================== class TemporalDataset(Dataset): def __init__(self, data, window_size=6, features=None): self.window_size = window_size self.feature_names = features self.site_to_id = {site: idx for idx, site in enumerate(data['site'].unique())} # 数据预处理 data = data.sort_values(['site', 'time']).dropna(subset=self.feature_names) if data.empty: raise ValueError("输入数据为空或包含缺失值") # 生成序列数据 self.sequences = [] self.targets = [] self.site_labels = [] self.timestamps = [] for site, group in data.groupby('site'): if len(group) < self.window_size + 1: continue values = group[self.feature_names].values times = group['time'].values unix_times = (times.astype(np.datetime64) - np.datetime64('1970-01-01T00:00:00')) / np.timedelta64(1, 's') for i in range(len(values) - self.window_size): self.sequences.append(values[i:i + self.window_size]) self.targets.append(values[i + self.window_size][0]) self.site_labels.append(self.site_to_id[site]) self.timestamps.append(unix_times[i + self.window_size]) self.num_samples = len(self.sequences) if self.num_samples == 0: raise ValueError("没有生成有效样本") def __len__(self): return self.num_samples def __getitem__(self, idx): noise = torch.randn(self.window_size, len(self.feature_names)) * 0.01 return ( torch.FloatTensor(self.sequences[idx]) + noise, torch.FloatTensor([self.targets[idx]]), torch.tensor(self.site_labels[idx], dtype=torch.long), torch.FloatTensor([self.timestamps[idx]]) ) # ============================== # 3. 站点感知LSTM模型 # ============================== class SiteLSTM(nn.Module): def __init__(self, input_size, num_sites, hidden_size=64): super().__init__() self.embedding = nn.Embedding(num_sites, 8) self.lstm = nn.LSTM( input_size=input_size, hidden_size=hidden_size, num_layers=2, batch_first=True, dropout=0.3 ) self.regressor = nn.Sequential( nn.Linear(hidden_size + 8, 32), nn.LayerNorm(32), nn.ReLU(), nn.Dropout(0.2), nn.Linear(32, 1)) def forward(self, x, site_ids): lstm_out, _ = self.lstm(x) site_emb = self.embedding(site_ids) combined = torch.cat([lstm_out[:, -1, :], site_emb], dim=1) return self.regressor(combined) # ============================== # 4. 训练和评估模块 # ============================== class TropoTrainer: def __init__(self, data_root='./data'): self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.loader = DirectoryBasedLoader(data_root) self.scaler = StandardScaler() def _preprocess(self, raw_df): # 空数据检查 if raw_df.empty: raise ValueError("原始数据为空,请检查数据加载逻辑") """数据预处理""" # 时间解析增强 try: raw_df['time'] = pd.to_datetime( raw_df['epoch'].str.replace(r'^(\d{2}):', r'20\1:', regex=True), format='%Y-%j:%H:%M:%S', errors='coerce' ) time_mask = raw_df['time'].isna() if time_mask.any(): print(f"发现{time_mask.sum()}条无效时间记录,示例无效数据:") print(raw_df[time_mask].head(2)) raw_df = raw_df[~time_mask].copy() except Exception as e: print(f"时间解析失败: {str(e)}") raise # 特征工程 if 'time' not in raw_df.columns: raise KeyError("时间列缺失,预处理失败") raw_df['hour'] = raw_df['time'].dt.hour raw_df['doy_sin'] = np.sin(2 * np.pi * raw_df['time'].dt.dayofyear / 365.25) raw_df['doy_cos'] = np.cos(2 * np.pi * raw_df['time'].dt.dayofyear / 365.25) # 特征列验证 required_features = ['trotot', 'tgntot', 'tgetot', 'stddev', 'lat', 'lon', 'alt', 'hour'] missing_features = [f for f in required_features if f not in raw_df.columns] if missing_features: raise KeyError(f"缺失关键特征列: {missing_features}") # 标准化 try: self.scaler.fit(raw_df[required_features]) raw_df[required_features] = self.scaler.transform(raw_df[required_features]) except ValueError as e: print(f"标准化失败: {str(e)}") print("数据统计信息:") print(raw_df[required_features].describe()) raise return raw_df def _inverse_transform(self, values): """反标准化""" dummy = np.zeros((len(values), len(self.scaler.feature_names_in_))) dummy[:, 0] = values return self.scaler.inverse_transform(dummy)[:, 0] def train(self, window_size=6, epochs=100, batch_size=32): try: # 加载数据 raw_df = self.loader.load_all_data() if raw_df.empty: raise ValueError("数据加载器返回空DataFrame") # 预处理 processed_df = self._preprocess(raw_df) print(f"预处理后数据量: {len(processed_df)}条") print("特征矩阵示例:") print(processed_df[['site', 'time', 'trotot']].head(3)) # 创建数据集 full_dataset = TemporalDataset(processed_df, window_size) print(f"生成有效样本数: {len(full_dataset)}") # 划分数据集(按时间顺序) train_size = int(0.8 * len(full_dataset)) train_dataset, test_dataset = torch.utils.data.random_split( full_dataset, [train_size, len(full_dataset) - train_size], generator=torch.Generator().manual_seed(42) ) # 初始化模型 model = SiteLSTM( input_size=len(full_dataset.feature_cols), num_sites=len(full_dataset.site_to_id) ).to(self.device) # 训练配置 optimizer = optim.AdamW(model.parameters(), lr=1e-4) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=5) criterion = nn.MSELoss() # 训练循环 train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) best_loss = float('inf') history = {'train': [], 'val': []} for epoch in range(epochs): # 训练阶段 model.train() train_loss = 0 for seq, target, site, _ in train_loader: seq = seq.to(self.device) target = target.to(self.device) site = site.to(self.device) optimizer.zero_grad() pred = model(seq, site) loss = criterion(pred, target) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), 1.0) optimizer.step() train_loss += loss.item() # 验证阶段 model.eval() val_loss = 0 predictions = [] with torch.no_grad(): val_loader = DataLoader(test_dataset, batch_size=128) for seq, target, site, _ in val_loader: seq = seq.to(self.device) target = target.to(self.device) site = site.to(self.device) pred = model(seq, site) val_loss += criterion(pred, target).item() predictions.append(pred.cpu().numpy()) # 记录历史 avg_train = train_loss / len(train_loader) avg_val = val_loss / len(val_loader) history['train'].append(avg_train) history['val'].append(avg_val) scheduler.step(avg_val) # 保存最佳模型 if avg_val < best_loss: best_loss = avg_val torch.save(model.state_dict(), 'best_model.pth') print(f"Epoch {epoch + 1:03d} | Train Loss: {avg_train:.4f} | Val Loss: {avg_val:.4f}") # 加载最佳模型 model.load_state_dict(torch.load('best_model.pth')) except Exception as e: print(f"\n{'*' * 40}") print(f"训练失败详细原因: {str(e)}") print(f"错误类型: {type(e).__name__}") import traceback traceback.print_exc() print(f"{'*' * 40}\n") raise return model, history def evaluate(self, model, output_dir='results'): """评估并保存结果""" os.makedirs(output_dir, exist_ok=True) # 重新加载完整数据 raw_df = self.loader.load_all_data() processed_df = self._preprocess(raw_df) full_dataset = TemporalDataset(processed_df, window_size=6) # 创建数据加载器 test_loader = DataLoader(full_dataset, batch_size=128) # 收集结果 results = [] model.eval() with torch.no_grad(): for seq, target, site, timestamp in test_loader: # ...获取预测值... timestamp = timestamp.numpy().flatten() datetime_objs = pd.to_datetime(timestamp, unit='s') seq = seq.to(self.device) site = site.to(self.device) pred = model(seq, site).cpu().numpy().flatten() true = target.numpy().flatten() # 反标准化 pred = self._inverse_transform(pred) true = self._inverse_transform(true) # 收集数据 for p, t, s, ts in zip(pred, true, site, datetime_objs): results.append({ 'site': list(full_dataset.site_to_id.keys())[s], 'timestamp': ts, 'true': t, 'pred': p }) # 转换为DataFrame result_df = pd.DataFrame(results) # 按站点保存结果 for site, group in result_df.groupby('site'): site_dir = os.path.join(output_dir, site) os.makedirs(site_dir, exist_ok=True) # CSV文件 csv_path = os.path.join(site_dir, f'{site}_predictions.csv') group.to_csv(csv_path, index=False) # Excel文件 excel_path = os.path.join(site_dir, f'{site}_predictions.xlsx') group.to_excel(excel_path, index=False) # 生成对比图 plt.figure(figsize=(12, 6)) plt.plot(group['timestamp'], group['true'], label='真实值') plt.plot(group['timestamp'], group['pred'], label='预测值', linestyle='--') plt.title(f'站点 {site} 对流层延迟预测对比') plt.xlabel('时间') plt.ylabel('延迟量 (mm)') plt.legend() plt.gcf().autofmt_xdate() plot_path = os.path.join(site_dir, f'{site}_comparison.png') plt.savefig(plot_path, bbox_inches='tight') plt.close() # 保存汇总文件 result_df.to_csv(os.path.join(output_dir, 'all_predictions.csv'), index=False) result_df.to_excel(os.path.join(output_dir, 'all_predictions.xlsx'), index=False) print(f"结果已保存至 {output_dir} 目录") return result_df # ============================== # 主程序 # ============================== if __name__ == "__main__": # 初始化训练器 trainer = TropoTrainer(data_root='./data') try: # 训练模型 model, history = trainer.train(epochs=100) # 可视化训练过程 plt.figure(figsize=(10, 5)) plt.plot(history['train'], label='训练损失') plt.plot(history['val'], label='验证损失') plt.title('训练过程') plt.xlabel('Epoch') plt.ylabel('MSE Loss') plt.legend() plt.savefig('training_history.png', bbox_inches='tight') # 评估并保存结果 results = trainer.evaluate(model) # 生成统计报告 report = results.groupby('site').apply(lambda x: pd.Series({ 'MAE(mm)': np.mean(np.abs(x['pred'] - x['true'])), 'Max_True': np.max(x['true']), 'Min_True': np.min(x['true']), 'Max_Pred': np.max(x['pred']), 'Min_Pred': np.min(x['pred']), 'Samples': len(x) })).reset_index() print("\n站点预测性能报告:") print(report.to_markdown(index=False)) except Exception as e: print(f"运行出错: {str(e)}")

import numpy as np import pandas as pd import torch import torch.nn as nn from sklearn.preprocessing import MinMaxScaler import matplotlib.pyplot as plt # -------------------- 配置参数 -------------------- window_size = 20 # 平滑窗口大小 time_step = 50 # 时间步长 pretrain_epochs = 400 # 预训练轮次 finetune_epochs = 100 # 微调轮次 # -------------------- 数据读取函数 -------------------- def load_and_process(file_path): """读取并处理单个CSV文件""" df = pd.read_csv(file_path) df['date/time'] = pd.to_datetime(df['date/time'], format='%Y/%m/%d %H:%M') df.set_index('date/time', inplace=True) series = df['act. fil. curr. end'].rolling(window=window_size).mean().dropna() return series # -------------------- 加载多源数据集 -------------------- source_files = [ r'D:\Pycharm_program\CT\CT-data\tube_history_614372271_data.csv', r'D:\Pycharm_program\CT\CT-data\tube_history_628132271.csv', r'D:\Pycharm_program\CT\CT-data\tube_history_679242371.csv' ] # 加载并预处理源数据 source_series = [] for file in source_files: s = load_and_process(file) source_series.append(s) # 合并所有源数据用于标准化 all_source_data = pd.concat(source_series) scaler = MinMaxScaler(feature_range=(0, 1)) scaler.fit(all_source_data.values.reshape(-1, 1)) # -------------------- 创建预训练数据集 -------------------- def create_dataset(data, time_step=50): """创建时间序列数据集""" X, y = [], [] for i in range(len(data)-time_step): X.append(data[i:i+time_step]) y.append(data[i+time_step]) return np.array(X), np.array(y) # 生成源数据训练集 X_pretrain, y_pretrain = [], [] for s in source_series: scaled = scaler.transform(s.values.reshape(-1, 1)) X, y = create_dataset(scaled.flatten(), time_step) X_pretrain.append(X) y_pretrain.append(y) X_pretrain = np.concatenate(X_pretrain) y_pretrain = np.concatenate(y_pretrain) # 转换为PyTorch Tensor X_pretrain_tensor = torch.Tensor(X_pretrain) y_pretrain_tensor = torch.Tensor(y_pretrain) # -------------------- 模型定义 -------------------- class LSTMModel(nn.Module): def __init__(self, input_size=50, hidden_size=50, output_size=1): super(LSTMModel, self)._

import numpy as np import pandas as pd import torch import torch.nn as nn import torch.nn.functional as F from torch.utils.data import TensorDataset, DataLoader from sklearn.preprocessing import MinMaxScaler from vmdpy import VMD from sklearn.cluster import OPTICS import math import matplotlib.pyplot as plt from torch.cuda import amp import time import logging # 设置日志 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) # -------------------- 配置参数 -------------------- CONFIG = { 'seq_length': 12, # 输入序列长度 'batch_size': 256, # 批大小 'epochs': 150, # 训练轮数 'd_model': 64, # 模型维度 'nhead': 4, # 注意力头数 'num_layers': 2, # Transformer层数 'dim_feedforward': 128, # 前馈网络维度 'dropout': 0.1, # Dropout率 'vmd_k': 8, # VMD分解模态数 'vmd_alpha': 2000, # VMD带宽约束 'vmd_tau': 0.1, # VMD噪声容忍度 'sru_hidden': 32, # SRU隐藏单元数 'sru_layers': 2, # SRU层数 'cluster_min_samples': 3, # 聚类最小样本数 'use_amp': True, # 启用混合精度训练 'device': 'cuda' if torch.cuda.is_available() else 'cpu' } # -------------------- 数据预处理模块 -------------------- def load_data(file_path): """加载并预处理风电数据""" logger.info(f"加载数据: {file_path}") try: data = pd.read_csv(file_path) logger.info(f"数据列: {data.columns.tolist()}") except Exception as e: logger.error(f"加载文件错误: {e}") raise time_col = 'Time' power_col = 'Power' # 检查必要列是否存在 required_cols = [time_col, power_col] missing_cols = [col for col in required_cols if col not in data.columns] if missing_cols: logger.warning(f"警告: 缺少必要列 {missing_cols}") # 如果缺少Time列,创建一个简单的时间索引 if time_col not in data.columns: logger.info("创建简单时间索引") data[time_col] = pd.date_range(start='2023-01-01', periods=len(data), freq='H') # 时间序列处理 data[time_col] = pd.to_datetime(data[time_col]) data = data.set_index(time_col).resample('1h').asfreq() data.index = data.index.fillna(pd.Timestamp.now()) # 功率处理 if power_col not in data.columns: logger.error("错误: 缺少功率列") raise ValueError("CSV文件必须包含功率列") data[power_col] = data[power_col].replace(0, np.nan) data[power_col] = data[power_col].interpolate(method='time') data[power_col] = data[power_col].fillna(0) # 添加风速特征 - 使用 windspeed_10m 和 windspeed_100m if 'windspeed_10m' in data.columns and 'windspeed_100m' in data.columns: logger.info("使用10m和100m风速数据计算有效风速") # 对于NaN值,进行插值 data['windspeed_10m'] = data['windspeed_10m'].interpolate(method='time').fillna(method='bfill') data['windspeed_100m'] = data['windspeed_100m'].interpolate(method='time').fillna(method='bfill') # 计算平均风速作为有效风速 data['EffectiveWind'] = (data['windspeed_10m'] + data['windspeed_100m']) / 2 else: logger.warning("风速数据缺失,生成模拟风速数据") # 基于功率生成合理的模拟风速 max_power = data[power_col].max() # 基本风速曲线:功率越高风速越高(但非线性) base_wind = np.clip(data[power_col] / max_power * 10, 2, 25) # 添加随机波动 random_fluctuation = np.random.normal(0, 1.5, len(data)) data['EffectiveWind'] = base_wind + random_fluctuation # 确保只返回必要的列 result_cols = ['Power'] if 'EffectiveWind' in data.columns: result_cols.append('EffectiveWind') logger.info(f"返回列: {result_cols}") return data[result_cols].values # -------------------- VMD分解模块(替代CEEMDAN)------------------- def vmd_decomposition(data, alpha=2000, tau=0.1, K=8): """执行VMD分解 - 比CEEMDAN更快更高效""" logger.info("开始VMD分解...") start_time = time.time() # 仅对功率数据进行分解 power_data = data[:, 0].flatten() # 执行VMD分解 u, u_hat, omega = VMD(power_data, alpha, tau, K, DC=0, init=1, tol=1e-7) # 保留分解结果和气象特征 imfs = u residual = power_data - np.sum(imfs, axis=0) # 添加残差作为最后一个分量 components = np.vstack([imfs, residual]) # 将气象特征附加到每个分量 components_with_features = [] for i in range(len(components)): comp = components[i] # 为每个分量添加气象特征 # 如果数据只有一列(功率),则复制该列作为特征 if data.shape[1] == 1: comp_with_features = np.column_stack((comp, np.zeros_like(comp))) else: comp_with_features = np.column_stack((comp, data[:, 1])) components_with_features.append(comp_with_features) logger.info(f"VMD分解完成,耗时: {time.time() - start_time:.2f}秒") return components_with_features # -------------------- 多维度特征提取 -------------------- def extract_features(component): """提取序列的多维度特征""" power_series = component[:, 0] # 1. 样本熵 def sample_entropy(series, m=2, alpha=0.2): n = len(series) if n < m + 1: return 0 std = np.std(series) r = alpha * std def _phi(_m): x = np.array([series[i:i + _m] for i in range(n - _m + 1)]) C = 0 for i in range(len(x)): dist = np.max(np.abs(x[i] - x), axis=1) C += np.sum((dist < r) & (dist > 0)) return C / ((n - _m) * (n - _m + 1)) return -np.log(_phi(m + 1) / _phi(m)) if _phi(m) != 0 else 0 # 2. 排列熵 def permutation_entropy(series, d=3, tau=1): n = len(series) if n < d * tau: return 0 # 创建符号序列 permutations = [] for i in range(n - d * tau + 1): segment = series[i:i + d * tau:tau] permutations.append(tuple(np.argsort(segment))) # 计算概率分布 unique, counts = np.unique(permutations, return_counts=True) probs = counts / len(permutations) # 计算熵 return -np.sum(probs * np.log(probs)) # 3. 频域能量 fft_vals = np.abs(np.fft.rfft(power_series)) spectral_energy = np.sum(fft_vals[:len(fft_vals) // 2]) / np.sum(fft_vals) return np.array([ sample_entropy(power_series), permutation_entropy(power_series), spectral_energy ]) # -------------------- 轻量化Transformer模型(高频序列)------------------- class ProbSparseAttention(nn.Module): """概率稀疏注意力机制 - 降低计算复杂度""" def __init__(self, d_model, n_heads, factor=5): super().__init__() self.d_model = d_model self.n_heads = n_heads self.factor = factor self.head_dim = d_model // n_heads def forward(self, Q, K, V): batch_size, seq_len, _ = Q.size() # 采样关键点 M = self.factor * int(np.ceil(np.log(seq_len))) sample_indices = torch.randperm(seq_len)[:M] K_sampled = K[:, sample_indices, :] V_sampled = V[:, sample_indices, :] # 计算稀疏注意力 Q = Q.view(batch_size, seq_len, self.n_heads, self.head_dim).transpose(1, 2) K_sampled = K_sampled.view(batch_size, M, self.n_heads, self.head_dim).transpose(1, 2) V_sampled = V_sampled.view(batch_size, M, self.n_heads, self.head_dim).transpose(1, 2) attn_scores = torch.matmul(Q, K_sampled.transpose(-2, -1)) / np.sqrt(self.head_dim) attn_weights = F.softmax(attn_scores, dim=-1) output = torch.matmul(attn_weights, V_sampled) output = output.transpose(1, 2).contiguous().view(batch_size, seq_len, self.d_model) return output class PositionalEncoding(nn.Module): def __init__(self, d_model, dropout=0.1, max_len=5000): super().__init__() self.dropout = nn.Dropout(p=dropout) pe = torch.zeros(max_len, d_model) position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) pe[:, 0::2] = torch.sin(position * div_term) pe[:, 1::2] = torch.cos(position * div_term) pe = pe.unsqueeze(0) self.register_buffer('pe', pe) def forward(self, x): x = x + self.pe[:, :x.size(1)] return self.dropout(x) class DistillingLayer(nn.Module): """蒸馏层 - 压缩序列长度""" def __init__(self, d_model): super().__init__() self.conv = nn.Conv1d( in_channels=d_model, out_channels=d_model, kernel_size=3, stride=2, padding=1 ) self.activation = nn.ReLU() def forward(self, x): # x: [batch, seq_len, d_model] x = x.permute(0, 2, 1) # [batch, d_model, seq_len] x = self.conv(x) x = self.activation(x) return x.permute(0, 2, 1) # [batch, new_seq, d_model] class EfficientTransformer(nn.Module): """高效Transformer模型 - 使用概率稀疏注意力和蒸馏机制""" def __init__(self, input_dim, d_model=64, nhead=4, num_layers=2, dim_feedforward=128, dropout=0.1): super().__init__() self.d_model = d_model self.embedding = nn.Linear(input_dim, d_model) self.pos_encoder = PositionalEncoding(d_model, dropout) # 编码器层 self.encoder_layers = nn.ModuleList() for i in range(num_layers): self.encoder_layers.append(nn.TransformerEncoderLayer( d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward, dropout=dropout, batch_first=True )) # 蒸馏层 self.distill_layers = nn.ModuleList([ DistillingLayer(d_model) for _ in range(num_layers - 1) ]) # 解码器 self.decoder = nn.TransformerDecoder( nn.TransformerDecoderLayer( d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward, dropout=dropout, batch_first=True ), num_layers=1 ) self.output_layer = nn.Linear(d_model, 1) def forward(self, src, tgt=None): # 嵌入和位置编码 src = self.embedding(src) * math.sqrt(self.d_model) src = self.pos_encoder(src) # 编码过程 for i, layer in enumerate(self.encoder_layers): src = layer(src) if i < len(self.distill_layers): src = self.distill_layers[i](src) # 解码过程 if tgt is None: tgt = torch.zeros(src.size(0), src.size(1), self.d_model, device=src.device) else: tgt = self.embedding(tgt) tgt = self.pos_encoder(tgt) output = self.decoder(tgt, src) output = self.output_layer(output) return output[:, -1, :].squeeze(-1) # -------------------- SRU模型(低频序列)------------------- class SRU(nn.Module): """Simple Recurrent Unit - 比GRU更快的替代方案""" def __init__(self, input_size, hidden_size, num_layers=2): super().__init__() self.hidden_size = hidden_size self.num_layers = num_layers # 门控参数 self.gates = nn.ModuleList() for i in range(num_layers): in_dim = input_size if i == 0 else hidden_size self.gates.append(nn.Linear(in_dim + hidden_size, 3 * hidden_size)) def forward(self, x): batch_size, seq_len, _ = x.size() h = torch.zeros(self.num_layers, batch_size, self.hidden_size, device=x.device) outputs = [] for t in range(seq_len): input_t = x[:, t, :] new_h = [] for i in range(self.num_layers): # 当前层的输入 layer_input = input_t if i == 0 else new_h[i - 1] # 与前一隐藏状态连接 combined = torch.cat((layer_input, h[i]), dim=1) # 计算门控 gates = self.gates[i](combined) f, r, c = torch.chunk(gates, 3, dim=1) f = torch.sigmoid(f) r = torch.sigmoid(r) c = torch.tanh(c) # 更新隐藏状态 h_i = f * h[i] + (1 - f) * c output = r * h_i new_h.append(h_i) input_t = output outputs.append(output) h = torch.stack(new_h, dim=0) return torch.stack(outputs, dim=1) class SRUAttention(nn.Module): """带注意力机制的SRU模型""" def __init__(self, input_dim, hidden_size=32, num_layers=2): super().__init__() self.sru = SRU(input_dim, hidden_size, num_layers) self.attention = nn.Sequential( nn.Linear(hidden_size, hidden_size), nn.Tanh(), nn.Linear(hidden_size, 1), nn.Softmax(dim=1) ) self.fc = nn.Linear(hidden_size, 1) def forward(self, x): # SRU输出: [batch, seq_len, hidden_size] sru_out = self.sru(x) # 注意力权重 attn_weights = self.attention(sru_out) # 上下文向量 context = torch.sum(attn_weights * sru_out, dim=1) return self.fc(context).squeeze(-1) # -------------------- 模型路由网络 -------------------- class RoutingNetwork(nn.Module): """动态模型路由网络 - 根据序列特征选择模型""" def __init__(self, transformer, sru_model): super().__init__() self.transformer = transformer self.sru_model = sru_model self.router = nn.Sequential( nn.Linear(3, 16), # 输入特征数 nn.ReLU(), nn.Linear(16, 2), nn.Softmax(dim=1) ) def forward(self, x, features): # 特征: [样本熵, 排列熵, 频域能量] route_probs = self.router(features) # 使用两个模型进行预测 trans_pred = self.transformer(x) sru_pred = self.sru_model(x) # 加权组合 return (route_probs[:, 0] * trans_pred + route_probs[:, 1] * sru_pred) # -------------------- 主流程(含动态路由策略)------------------- if __name__ == "__main__": try: # 设置随机种子确保可重复性 torch.manual_seed(42) np.random.seed(42) # 数据加载与预处理 file_path = 'G:/shuju/Location1.csv' raw_data = load_data(file_path) # 打印前5行数据 logger.info(f"数据形状: {raw_data.shape}") logger.info(f"前5行数据:\n{raw_data[:5]}") # 数据标准化 scalers = [] scaled_data = np.zeros_like(raw_data) for i in range(raw_data.shape[1]): scaler = MinMaxScaler(feature_range=(0, 1)) scaled_data[:, i] = scaler.fit_transform(raw_data[:, i].reshape(-1, 1)).flatten() scalers.append(scaler) # VMD分解(替代CEEMDAN) components = vmd_decomposition(scaled_data, alpha=CONFIG['vmd_alpha'], tau=CONFIG['vmd_tau'], K=CONFIG['vmd_k']) # 提取特征并聚类分组 logger.info("提取特征并聚类分组...") features = np.array([extract_features(comp) for comp in components]) # OPTICS聚类 clusterer = OPTICS(min_samples=CONFIG['cluster_min_samples'], xi=0.05) labels = clusterer.fit_predict(features) # 按聚类结果分组 grouped_components = {} for i, label in enumerate(labels): if label not in grouped_components: grouped_components[label] = [] grouped_components[label].append(components[i][:, 0]) # 只取功率部分 # 重构序列 reconstructed_series = [] for label, comp_list in grouped_components.items(): if len(comp_list) > 1: reconstructed_series.append(np.sum(comp_list, axis=0)) else: reconstructed_series.append(comp_list[0]) logger.info(f"重构为 {len(reconstructed_series)} 个序列") # 转换为监督学习格式 def create_sequences(data, seq_length): X, y = [], [] for i in range(len(data) - seq_length): X.append(data[i:i + seq_length]) y.append(data[i + seq_length]) return np.array(X), np.array(y) # 创建数据集 datasets = [] for series in reconstructed_series: X, y = create_sequences(series, CONFIG['seq_length']) datasets.append((X, y)) # 模型初始化 transformer = EfficientTransformer(input_dim=1, d_model=CONFIG['d_model'], nhead=CONFIG['nhead'], num_layers=CONFIG['num_layers'], dim_feedforward=CONFIG['dim_feedforward'], dropout=CONFIG['dropout']).to(CONFIG['device']) sru_model = SRUAttention(input_dim=1, hidden_size=CONFIG['sru_hidden'], num_layers=CONFIG['sru_layers']).to(CONFIG['device']) routing_net = RoutingNetwork(transformer, sru_model).to(CONFIG['device']) criterion = nn.MSELoss() optimizer = torch.optim.Adam(routing_net.parameters(), lr=0.001) # 混合精度训练的梯度缩放器 scaler = amp.GradScaler(enabled=CONFIG['use_amp']) # 训练循环 logger.info("开始训练...") start_time = time.time() # 由于我们有多个序列,需要合并训练数据 all_X, all_y, all_features = [], [], [] for i, (X, y) in enumerate(datasets): # 为每个序列提取特征 seq_features = features[i] all_X.append(X) all_y.append(y) # 为每个样本复制特征 all_features.extend([seq_features] * len(X)) all_X = np.concatenate(all_X) all_y = np.concatenate(all_y) all_features = np.array(all_features) # 按时间顺序划分数据集 split_index = int(len(all_X) * 0.9) train_dataset = TensorDataset( torch.tensor(all_X[:split_index], dtype=torch.float32), torch.tensor(all_y[:split_index], dtype=torch.float32), torch.tensor(all_features[:split_index], dtype=torch.float32) ) test_dataset = TensorDataset( torch.tensor(all_X[split_index:], dtype=torch.float32), torch.tensor(all_y[split_index:], dtype=torch.float32), torch.tensor(all_features[split_index:], dtype=torch.float32) ) # 创建DataLoader train_loader = DataLoader(train_dataset, batch_size=CONFIG['batch_size'], shuffle=True) test_loader = DataLoader(test_dataset, batch_size=CONFIG['batch_size']) # 训练循环 for epoch in range(CONFIG['epochs']): routing_net.train() epoch_loss = 0 for inputs, targets, feat in train_loader: inputs, targets, feat = inputs.to(CONFIG['device']), targets.to(CONFIG['device']), feat.to( CONFIG['device']) optimizer.zero_grad() # 混合精度训练 with amp.autocast(enabled=CONFIG['use_amp']): outputs = routing_net(inputs.unsqueeze(-1), feat) loss = criterion(outputs, targets) scaler.scale(loss).backward() scaler.step(optimizer) scaler.update() epoch_loss += loss.item() avg_loss = epoch_loss / len(train_loader) logger.info(f"Epoch {epoch + 1}/{CONFIG['epochs']}, Loss: {avg_loss:.6f}") logger.info(f"训练完成,总耗时: {time.time() - start_time:.2f}秒") # 预测与评估 def inverse_transform(predictions, feature_idx=0): return scalers[feature_idx].inverse_transform(predictions.reshape(-1, 1)) routing_net.eval() all_preds, all_targets = [], [] with torch.no_grad(): for inputs, targets, feat in test_loader: inputs, targets, feat = inputs.to(CONFIG['device']), targets.to(CONFIG['device']), feat.to( CONFIG['device']) outputs = routing_net(inputs.unsqueeze(-1), feat) all_preds.append(outputs.cpu().numpy()) all_targets.append(targets.cpu().numpy()) all_preds = np.concatenate(all_preds) all_targets = np.concatenate(all_targets) # 反归一化 final_pred = inverse_transform(all_preds) y_true = inverse_transform(all_targets) # 评估指标 from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score rmse = np.sqrt(mean_squared_error(y_true, final_pred)) mae = mean_absolute_error(y_true, final_pred) r2 = r2_score(y_true, final_pred) logger.info(f"最终评估 - RMSE: {rmse:.4f}, MAE: {mae:.4f}, R²: {r2:.4f}") # 绘制预测结果与真实值的对比图 plt.figure(figsize=(15, 6)) plt.plot(y_true[:500], label='True', linewidth=2) plt.plot(final_pred[:500], label='Predicted', linestyle='--') plt.title(f'Wind Power Prediction\nRMSE: {rmse:.2f}, MAE: {mae:.2f}, R²: {r2:.4f}') plt.xlabel('Time Steps') plt.ylabel('Power (MW)') plt.legend() plt.grid(True) plt.savefig('optimized_prediction_comparison.png', dpi=300) plt.show() # 保存模型 torch.save(routing_net.state_dict(), 'optimized_wind_power_model.pth') logger.info("模型已保存") except Exception as e: logger.error(f"程序出错: {str(e)}") import traceback logger.error(traceback.format_exc())阅读该代码生成流程图,并确保流程图美观,简洁符合代码

## 项目结构 invoice_recognition/ ├── main.py # 主程序入口 ├── config.py # 配置文件 ├── requirements.txt # 依赖库 ├── data/ # 数据目录 │ ├── invoices/ # 原始发票图像 │ ├── processed/ # 处理后的图像 │ └── models/ # 预训练模型 │ └── invoice_detector.pth ├── src/ # 源代码目录 │ ├── preprocessing.py # 图像预处理 │ ├── detection.py # 发票检测与定位 │ ├── ocr.py # 文字识别 │ ├── table_recognition.py # 表格识别 │ ├── validation.py # 数据验证 │ ├── visualization.py # 可视化工具 │ ├── utils.py # 辅助函数 │ └── exceptions.py # 自定义异常 └── results/ # 结果输出目录 └── reports/ # 分析报告 ## 完整代码实现 ### 1. config.py - 配置文件 python # config.py import os # 路径配置 BASE_DIR = os.path.dirname(os.path.abspath(__file__)) DATA_DIR = os.path.join(BASE_DIR, 'data') INVOICE_DIR = os.path.join(DATA_DIR, 'invoices') PROCESSED_DIR = os.path.join(DATA_DIR, 'processed') MODELS_DIR = os.path.join(DATA_DIR, 'models') RESULTS_DIR = os.path.join(BASE_DIR, 'results') REPORTS_DIR = os.path.join(RESULTS_DIR, 'reports') # 图像处理参数 PREPROCESS_PARAMS = { 'denoise_h': 10, 'adaptive_block_size': 11, 'adaptive_c': 2, 'canny_threshold1': 50, 'canny_threshold2': 150, 'perspective_padding': 20 } # OCR配置 OCR_CONFIG = { 'lang': 'chi_sim+eng', 'oem': 3, 'psm': 6, 'table_psm': 11 } # 深度学习模型配置 MODEL_CONFIG = { 'invoice_detector': os.path.join(MODELS_DIR, 'invoice_detector.pth'), 'confidence_threshold': 0.8, 'nms_threshold': 0.4 } # 验证规则 VALIDATION_RULES = { 'max_invoice_age_months': 3, 'min_amount': 1.0, 'max_amount': 100000.0, 'required_fields': ['发票代码', '发票号码', '开票日期', '金额'] } # 创建必要目录 os.makedirs(INVOICE_DIR, exist_ok=True) os.makedirs(PROCESSED_DIR, exist_ok=True) os.makedirs(REPORTS_DIR, exist_ok=True) ### 2. requirements.txt - 依赖库 opencv-python==4.5.5.64 numpy==1.22.3 pytesseract==0.3.9 Pillow==9.1.0 matplotlib==3.5.1 scikit-image==0.19.2 torch==1.11.0 torchvision==0.12.0 pandas==1.4.2 scipy==1.8.0 seaborn==0.11.2 ### 3. src/exceptions.py - 自定义异常 python # src/exceptions.py class InvoiceProcessingError(Exception): """发票处理异常基类""" pass class InvoiceNotFoundError(InvoiceProcessingError): """未检测到发票""" pass class OCRFailureError(InvoiceProcessingError): """OCR识别失败""" pass class ValidationError(InvoiceProcessingError): """数据验证失败""" pass class PerspectiveTransformError(InvoiceProcessingError): """透视变换失败""" pass ### 4. src/utils.py - 辅助函数 python # src/utils.py import os import cv2 import numpy as np import matplotlib.pyplot as plt from config import PROCESSED_DIR, REPORTS_DIR def save_processed_image(image, filename, suffix=""): """保存处理后的图像""" if suffix: name, ext = os.path.splitext(filename) filename = f"{name}_{suffix}{ext}" output_path = os.path.join(PROCESSED_DIR, filename) cv2.imwrite(output_path, image) return output_path def plot_histogram(data, title, xlabel, ylabel, filename): """绘制并保存直方图""" plt.figure(figsize=(10, 6)) plt.hist(data, bins=20, alpha=0.7, color='skyblue') plt.title(title) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.grid(True, linestyle='--', alpha=0.7) output_path = os.path.join(REPORTS_DIR, filename) plt.savefig(output_path) plt.close() return output_path def order_points(pts): """重新排列四个点:左上,右上,右下,左下""" rect = np.zeros((4, 2), dtype="float32") s = pts.sum(axis=1) rect[0] = pts[np.argmin(s)] rect[2] = pts[np.argmax(s)] diff = np.diff(pts, axis=1) rect[1] = pts[np.argmin(diff)] rect[3] = pts[np.argmax(diff)] return rect def four_point_transform(image, pts, padding=0): """应用四点透视变换""" rect = order_points(pts) (tl, tr, br, bl) = rect # 计算新图像的宽度和高度 widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) maxWidth = max(int(widthA), int(widthB)) heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) maxHeight = max(int(heightA), int(heightB)) # 构建目标点 dst = np.array([ [padding, padding], [maxWidth - 1 + padding, padding], [maxWidth - 1 + padding, maxHeight - 1 + padding], [padding, maxHeight - 1 + padding] ], dtype="float32") # 计算透视变换矩阵 M = cv2.getPerspectiveTransform(rect, dst) # 应用变换 warped = cv2.warpPerspective(image, M, (maxWidth + 2*padding, maxHeight + 2*padding)) return warped ### 5. src/preprocessing.py - 图像预处理 python # src/preprocessing.py import cv2 import numpy as np from .exceptions import InvoiceProcessingError from config import PREPROCESS_PARAMS from .utils import save_processed_image def preprocess_image(image_path): """ 图像预处理流程 步骤:1.读取 2.灰度化 3.去噪 4.二值化 5.边缘检测 """ # 1. 读取图像 orig = cv2.imread(image_path) if orig is None: raise InvoiceProcessingError(f"无法读取图像: {image_path}") filename = os.path.basename(image_path) # 2. 灰度化 gray = cv2.cvtColor(orig, cv2.COLOR_BGR2GRAY) save_processed_image(gray, filename, "gray") # 3. 去噪(非局部均值去噪) denoised = cv2.fastNlMeansDenoising( gray, h=PREPROCESS_PARAMS['denoise_h'] ) save_processed_image(denoised, filename, "denoised") # 4. 自适应二值化 binary = cv2.adaptiveThreshold( denoised, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, PREPROCESS_PARAMS['adaptive_block_size'], PREPROCESS_PARAMS['adaptive_c'] ) save_processed_image(binary, filename, "binary") # 5. 边缘检测 edges = cv2.Canny( binary, PREPROCESS_PARAMS['canny_threshold1'], PREPROCESS_PARAMS['canny_threshold2'] ) save_processed_image(edges, filename, "edges") return orig, gray, denoised, binary, edges ### 6. src/detection.py - 发票检测与定位 python # src/detection.py import cv2 import numpy as np import torch import torchvision from .exceptions import InvoiceNotFoundError, PerspectiveTransformError from config import MODEL_CONFIG, PREPROCESS_PARAMS from .utils import four_point_transform, save_processed_image def detect_invoice_contour(edges): """使用传统方法检测发票轮廓""" # 查找轮廓 contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if not contours: raise InvoiceNotFoundError("未检测到任何轮廓") # 按面积排序,取前5个 contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5] # 寻找近似矩形轮廓 invoice_contour = None for cnt in contours: peri = cv2.arcLength(cnt, True) approx = cv2.approxPolyDP(cnt, 0.02 * peri, True) # 如果是四边形 if len(approx) == 4: invoice_contour = approx break if invoice_contour is None: raise InvoiceNotFoundError("未找到有效的发票轮廓") return invoice_contour.reshape(4, 2) def detect_invoice_dl(image): """使用深度学习检测发票位置""" # 加载预训练模型 model = torch.load(MODEL_CONFIG['invoice_detector']) model.eval() # 预处理图像 transform = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) input_tensor = transform(image).unsqueeze(0) # 推理 with torch.no_grad(): predictions = model(input_tensor) # 应用非极大值抑制 indices = torchvision.ops.nms( predictions[0]['boxes'], predictions[0]['scores'], MODEL_CONFIG['nms_threshold'] ) # 获取最佳检测结果 best_score = 0 best_box = None for idx in indices: score = predictions[0]['scores'][idx].item() if score > MODEL_CONFIG['confidence_threshold'] and score > best_score: best_score = score best_box = predictions[0]['boxes'][idx].cpu().numpy().astype(int) if best_box is None: raise InvoiceNotFoundError("深度学习模型未检测到发票") # 将边界框转换为四点坐标 x1, y1, x2, y2 = best_box invoice_points = np.array([ [x1, y1], [x2, y1], [x2, y2], [x1, y2] ]) return invoice_points def extract_invoice_region(orig, edges, method='hybrid'): """提取发票区域并进行透视变换""" filename = os.path.basename(orig) try: if method == 'traditional': points = detect_invoice_contour(edges) elif method == 'deep_learning': points = detect_invoice_dl(orig) else: # hybrid try: points = detect_invoice_contour(edges) except InvoiceNotFoundError: points = detect_invoice_dl(orig) # 绘制检测点 marked = orig.copy() for point in points: cv2.circle(marked, tuple(point), 10, (0, 0, 255), -1) save_processed_image(marked, filename, "detected_points") # 应用透视变换 warped = four_point_transform( orig, points, padding=PREPROCESS_PARAMS['perspective_padding'] ) save_processed_image(warped, filename, "warped") return warped except Exception as e: raise PerspectiveTransformError(f"透视变换失败: {str(e)}") ### 7. src/ocr.py - 文字识别 python # src/ocr.py import pytesseract from PIL import Image import cv2 import numpy as np import re from .exceptions import OCRFailureError from config import OCR_CONFIG def enhance_text_region(image): """增强文本区域的可读性""" # 使用CLAHE增强对比度 lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB) l, a, b = cv2.split(lab) # 应用CLAHE到L通道 clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8)) l = clahe.apply(l) # 合并通道并转换回BGR enhanced_lab = cv2.merge([l, a, b]) enhanced = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2BGR) # 轻微锐化 kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]]) sharpened = cv2.filter2D(enhanced, -1, kernel) return sharpened def extract_text(image, config=None): """从图像中提取文本""" if config is None: config = f"--oem {OCR_CONFIG['oem']} --psm {OCR_CONFIG['psm']} -l {OCR_CONFIG['lang']}" # 转换为PIL图像 pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) try: text = pytesseract.image_to_string(pil_img, config=config) return text.strip() except Exception as e: raise OCRFailureError(f"OCR识别失败: {str(e)}") def extract_invoice_info(text): """从文本中提取结构化发票信息""" if not text: raise OCRFailureError("OCR未返回任何文本") # 定义提取模式 patterns = { '发票代码': r'发票代码\s*[::]?\s*(\d+)', '发票号码': r'发票号码\s*[::]?\s*(\d+)', '开票日期': r'开票日期\s*[::]?\s*(\d{4}年\d{1,2}月\d{1,2}日)', '购买方': r'购买方[::]\s*名\s*称\s*[::]\s*([^\n]+)', '销售方': r'销售方[::]\s*名\s*称\s*[::]\s*([^\n]+)', '金额': r'小写\s*[::]?\s*[¥¥]?\s*(\d+\.\d{2})', '价税合计': r'价税合计\s*\(.*\)\s*[::]?\s*[¥¥]?\s*(\d+\.\d{2})', '校验码': r'校验码\s*[::]?\s*([0-9a-zA-Z]{20})' } results = {} for key, pattern in patterns.items(): match = re.search(pattern, text) results[key] = match.group(1) if match else None # 如果金额未找到,尝试其他模式 if results['金额'] is None: amount_match = re.search(r'¥\s*(\d+\.\d{2})', text) results['金额'] = amount_match.group(1) if amount_match else None return results ### 8. src/table_recognition.py - 表格识别 python # src/table_recognition.py import cv2 import numpy as np import pytesseract from .exceptions import OCRFailureError from config import OCR_CONFIG def detect_table_lines(image): """检测表格线""" # 转换为灰度图 gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # 二值化 thresh = cv2.adaptiveThreshold( gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 25, 16 ) # 检测水平线 horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40, 1)) horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2) # 检测垂直线 vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 40)) vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2) # 合并表格线 table_lines = cv2.add(horizontal, vertical) return table_lines, horizontal, vertical def extract_table_cells(image): """提取表格单元格""" # 检测表格线 table_lines, horizontal, vertical = detect_table_lines(image) # 查找轮廓 contours, _ = cv2.findContours(table_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # 筛选出单元格轮廓 cells = [] for cnt in contours: x, y, w, h = cv2.boundingRect(cnt) # 过滤太小的区域 if w > 20 and h > 20: cells.append((x, y, w, h)) # 按位置排序单元格 cells.sort(key=lambda c: (c[1], c[0])) return cells def recognize_table(image): """识别表格内容""" # 提取单元格 cells = extract_table_cells(image) # OCR配置 config = f"--oem {OCR_CONFIG['oem']} --psm {OCR_CONFIG['table_psm']} -l {OCR_CONFIG['lang']}" table_data = [] for i, (x, y, w, h) in enumerate(cells): # 提取单元格区域 cell_img = image[y:y+h, x:x+w] # 应用OCR cell_text = pytesseract.image_to_string(cell_img, config=config).strip() # 添加到表格数据 table_data.append({ 'cell_id': i, 'x': x, 'y': y, 'width': w, 'height': h, 'text': cell_text }) return table_data ### 9. src/validation.py - 数据验证 python # src/validation.py import datetime import re from .exceptions import ValidationError from config import VALIDATION_RULES def validate_invoice_info(info): """验证发票信息""" errors = [] # 检查必填字段 for field in VALIDATION_RULES['required_fields']: if not info.get(field): errors.append(f"缺少必填字段: {field}") # 验证发票代码(10-12位数字) if info.get('发票代码') and not re.match(r'^\d{10,12}$', info['发票代码']): errors.append("发票代码格式错误") # 验证发票号码(8位数字) if info.get('发票号码') and not re.match(r'^\d{8}$', info['发票号码']): errors.append("发票号码格式错误") # 验证日期 if info.get('开票日期'): try: # 转换日期字符串 date_str = info['开票日期'].replace('年', '-').replace('月', '-').replace('日', '') date_obj = datetime.datetime.strptime(date_str, '%Y-%m-%d') # 检查日期范围 today = datetime.datetime.now() max_age = datetime.timedelta(days=VALIDATION_RULES['max_invoice_age_months']*30) if date_obj > today: errors.append("开票日期不能是未来日期") elif today - date_obj > max_age: errors.append("开票日期超过有效期限") except ValueError: errors.append("开票日期格式解析错误") # 验证金额 if info.get('金额'): try: amount = float(info['金额']) if amount < VALIDATION_RULES['min_amount']: errors.append(f"金额过小: {amount}") elif amount > VALIDATION_RULES['max_amount']: errors.append(f"金额过大: {amount}") except ValueError: errors.append("金额格式错误") # 验证校验码(20位数字+字母) if info.get('校验码') and not re.match(r'^[0-9a-zA-Z]{20}$', info['校验码']): errors.append("校验码格式错误") if errors: raise ValidationError("; ".join(errors)) return True ### 10. src/visualization.py - 可视化工具 python # src/visualization.py import cv2 import matplotlib.pyplot as plt import numpy as np from .utils import save_processed_image def visualize_processing_steps(image_path, steps): """可视化处理步骤""" plt.figure(figsize=(15, 8)) # 原始图像 orig = cv2.imread(image_path) orig_rgb = cv2.cvtColor(orig, cv2.COLOR_BGR2RGB) plt.subplot(2, 3, 1) plt.imshow(orig_rgb) plt.title("原始图像") plt.axis('off') # 显示每个处理步骤 for i, (title, image) in enumerate(steps.items(), 2): plt.subplot(2, 3, i) if len(image.shape) == 2: # 灰度图 plt.imshow(image, cmap='gray') else: # 转换BGR为RGB image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) plt.imshow(image_rgb) plt.title(title) plt.axis('off') plt.tight_layout() # 保存可视化结果 filename = os.path.basename(image_path) output_path = os.path.join(PROCESSED_DIR, f"processing_steps_{filename}") plt.savefig(output_path) plt.close() return output_path def visualize_table(image, table_data): """可视化表格识别结果""" # 创建副本用于绘制 table_vis = image.copy() # 绘制单元格边界和文本 for cell in table_data: x, y, w, h = cell['x'], cell['y'], cell['width'], cell['height'] # 绘制矩形 cv2.rectangle(table_vis, (x, y), (x+w, y+h), (0, 255, 0), 2) # 绘制文本 cv2.putText(table_vis, cell['text'], (x+5, y+20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) # 保存结果 filename = os.path.basename(image_path) output_path = os.path.join(PROCESSED_DIR, f"table_detection_{filename}") cv2.imwrite(output_path, table_vis) return output_path, table_vis ### 11. main.py - 主程序入口 python # main.py import os import argparse import json import pandas as pd from datetime import datetime from config import INVOICE_DIR, RESULTS_DIR, REPORTS_DIR from src.preprocessing import preprocess_image from src.detection import extract_invoice_region from src.ocr import enhance_text_region, extract_text, extract_invoice_info from src.table_recognition import recognize_table from src.validation import validate_invoice_info from src.visualization import visualize_processing_steps, visualize_table from src.exceptions import InvoiceProcessingError, InvoiceNotFoundError, OCRFailureError, ValidationError from src.utils import plot_histogram def process_invoice(image_path, output_report=False): """处理单个发票图像""" try: print(f"\n处理发票: {os.path.basename(image_path)}") print("-" * 50) # 1. 图像预处理 orig, gray, denoised, binary, edges = preprocess_image(image_path) # 2. 发票检测与定位 warped = extract_invoice_region(orig, edges, method='hybrid') # 3. 文本区域增强 enhanced = enhance_text_region(warped) # 4. OCR识别 text = extract_text(enhanced) print("OCR识别结果摘要:") print(text[:500] + "..." if len(text) > 500 else text) print("-" * 50) # 5. 提取结构化信息 info = extract_invoice_info(text) print("提取的发票信息:") for key, value in info.items(): print(f"{key}: {value}") # 6. 表格识别(可选) try: table_data = recognize_table(warped) print(f"检测到 {len(table_data)} 个表格单元格") except Exception as e: table_data = [] print(f"表格识别失败: {str(e)}") # 7. 数据验证 validate_invoice_info(info) print("数据验证通过") # 8. 可视化处理步骤 steps = { "灰度化": gray, "去噪": denoised, "二值化": binary, "边缘检测": edges, "校正后": warped } steps_path = visualize_processing_steps(image_path, steps) # 9. 可视化表格识别(如果检测到表格) table_vis_path = None if table_data: table_vis_path, _ = visualize_table(warped, table_data) # 保存结果 result = { 'filename': os.path.basename(image_path), 'processing_steps': steps_path, 'table_visualization': table_vis_path, 'ocr_text': text, 'extracted_info': info, 'table_data': table_data, 'timestamp': datetime.now().isoformat(), 'status': 'success' } print(f"\n处理完成: {image_path}") return result except InvoiceProcessingError as e: print(f"\n处理失败: {str(e)}") return { 'filename': os.path.basename(image_path), 'error': str(e), 'timestamp': datetime.now().isoformat(), 'status': 'failed' } def batch_process_invoices(input_dir, output_dir): """批量处理发票目录""" results = [] amounts = [] # 获取所有图像文件 image_files = [f for f in os.listdir(input_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))] if not image_files: print(f"在目录 {input_dir} 中未找到图像文件") return print(f"开始批量处理 {len(image_files)} 张发票...") # 处理每张发票 for i, filename in enumerate(image_files): image_path = os.path.join(input_dir, filename) print(f"\n[{i+1}/{len(image_files)}] 处理 {filename}") result = process_invoice(image_path) results.append(result) # 收集金额用于分析 if result['status'] == 'success' and '金额' in result['extracted_info']: try: amount = float(result['extracted_info']['金额']) amounts.append(amount) except: pass # 保存结果到JSON json_path = os.path.join(output_dir, 'invoice_results.json') with open(json_path, 'w', encoding='utf-8') as f: json.dump(results, f, ensure_ascii=False, indent=2) print(f"\n处理完成,结果已保存至: {json_path}") # 生成分析报告 if amounts: hist_path = plot_histogram( amounts, "发票金额分布", "金额 (元)", "发票数量", "amount_distribution.png" ) print(f"金额分布图已保存至: {hist_path}") # 生成CSV摘要 summary_data = [] for result in results: if result['status'] == 'success': info = result['extracted_info'] summary_data.append({ '文件名': result['filename'], '发票代码': info.get('发票代码', ''), '发票号码': info.get('发票号码', ''), '开票日期': info.get('开票日期', ''), '金额': info.get('金额', ''), '购买方': info.get('购买方', ''), '销售方': info.get('销售方', '') }) else: summary_data.append({ '文件名': result['filename'], '错误': result['error'] }) df = pd.DataFrame(summary_data) csv_path = os.path.join(output_dir, 'invoice_summary.csv') df.to_csv(csv_path, index=False, encoding='utf-8-sig') print(f"摘要报告已保存至: {csv_path}") return results if __name__ == "__main__": parser = argparse.ArgumentParser(description='电子发票识别系统') parser.add_argument('--input', type=str, default=INVOICE_DIR, help='输入发票目录路径') parser.add_argument('--output', type=str, default=RESULTS_DIR, help='结果输出目录路径') parser.add_argument('--single', type=str, help='处理单个发票文件路径') args = parser.parse_args() if args.single: # 处理单个发票 result = process_invoice(args.single, output_report=True) print("\n处理结果:") print(json.dumps(result, indent=2, ensure_ascii=False)) else: # 批量处理 batch_process_invoices(args.input, args.output)我这个代码可以正确运行并实现功能吗

# -*- coding: utf-8 -*- # 重新增加了然门控变得更快得方式:1.beta_l0更大;2.log_alpha的学习率变为2.0;3.添加熵正则化。 from __future__ import annotations import math import os import random import time from collections import deque from pathlib import Path from typing import Tuple import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torch.optim.lr_scheduler import CosineAnnealingLR from torch.utils.data import DataLoader from torchvision import datasets, models, transforms from sklearn.cluster import KMeans import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.metrics import ( silhouette_score, silhouette_samples, calinski_harabasz_score, davies_bouldin_score, ) from sklearn.manifold import TSNE try: import umap # 只有 umap-learn 才带 UMAP 类 HAS_UMAP = hasattr(umap, "UMAP") or hasattr(umap, "umap_") except ImportError: HAS_UMAP = False from datetime import datetime from matplotlib.patches import Rectangle import warnings # -------------------------- Global configuration -------------------------- # class CFG: # Paths data_root: str = r"D:\dataset\TILDA_8class_73" save_root: str = r"D:\SCI_exp\7_29\exp_file" # Dataset & DL batch_size: int = 128 num_workers: int = 0 # tune to your CPU img_size: int = 224 # F2013 images are 48×48; we upscale for ResNet‐18 # Model dimensions (§3.5.1) d_backbone: int = 512 d_proj: int = 128 K_max: int = 3 mem_size: int = 4096 # Optimisation (§3.5.1) lr_warmup: float = 1e-3 lr_joint: float = 3e-4 lr_ft: float = 1e-4 weight_decay: float = 5e-4 n_epochs_warmup: int = 15#5 n_epochs_joint: int = 150 #20 n_epochs_ft: int = 25 #15 # Loss hyper‑params lambda1: float = 0.5 # push–pull alpha_proto: float = 0.1 scale_ce: float = 30.0 gamma_se: float = 20 # 自表示权重 0.5 # ---------- Hard-Concrete ---------- tau0_hc: float = 1.5 # 初始温度 tau_min_hc: float = 0.15 # 最低温度 anneal_epochs_hc: int = 5 gamma_hc: float = -0.1 # stretch 下界 zeta_hc: float = 1.1 # stretch 上界 beta_l0: float = 5e-2 # L0 正则系数 5e-2 hc_threshold: float = 0.35 # Misc seed: int = 42 device: str = "cuda" if torch.cuda.is_available() else "cpu" # ---------- datetime ---------- # def get_timestamp(): """获取当前时间戳,格式:YYYYMMDD_HHMMSS""" return datetime.now().strftime("%Y%m%d_%H%M%S") # ---------- diagnostics ---------- # MAX_SAMPLED = 5_000 # None → 全量 timestamp = get_timestamp() # 获取当前时间戳 DIAG_DIR = Path(CFG.save_root) / f"diagnostics_{timestamp}" # 文件夹名包含时间戳 DIAG_DIR.mkdir(parents=True, exist_ok=True) # -------------------------- Reproducibility -------------------------- # torch.manual_seed(CFG.seed) random.seed(CFG.seed) # -------------------------- Utility functions -------------------------- # def L2_normalise(t: torch.Tensor, dim: int = 1, eps: float = 1e-12) -> torch.Tensor: return F.normalize(t, p=2, dim=dim, eps=eps) def pairwise_cosine(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: """Compute cosine similarity between all pairs in *x* and *y*.""" x = L2_normalise(x) y = L2_normalise(y) return x @ y.T # (N, M) # -------------------------- Memory bank (FIFO queue) -------------------------- # class MemoryBank: """Fixed‑size FIFO queue storing (p, q, y_c). All tensors are detached.""" def __init__(self, dim: int, size: int): self.size = size self.dim = dim self.ptr = 0 self.is_full = False # pre‑allocate self.p_bank = torch.zeros(size, dim, device=CFG.device) self.q_bank = torch.zeros_like(self.p_bank) self.y_bank = torch.zeros(size, dtype=torch.long, device=CFG.device) @torch.no_grad() def enqueue(self, p: torch.Tensor, q: torch.Tensor, y: torch.Tensor): b = p.size(0) if b > self.size: p, q, y = p[-self.size:], q[-self.size:], y[-self.size:] b = self.size idx = (torch.arange(b, device=CFG.device) + self.ptr) % self.size self.p_bank[idx] = p.detach() self.q_bank[idx] = q.detach() self.y_bank[idx] = y.detach() self.ptr = (self.ptr + b) % self.size if self.ptr == 0: self.is_full = True def get(self) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: valid = self.size if self.is_full else self.ptr return ( self.p_bank[:valid].detach(), self.q_bank[:valid].detach(), self.y_bank[:valid].detach(), ) # -------------------------- Projection heads -------------------------- # class MLPHead(nn.Module): def __init__(self, in_dim: int, out_dim: int): super().__init__() self.mlp = nn.Sequential( nn.Linear(in_dim, out_dim//2, bias=False), nn.BatchNorm1d(out_dim//2), nn.ReLU(inplace=True), nn.Linear(out_dim//2, out_dim, bias=True), ) def forward(self, x: torch.Tensor): return self.mlp(x) # -------------------------- Cosine classifier -------------------------- # class CosineLinear(nn.Module): """Cosine classifier with fixed scale *s* (Eq. CE).""" def __init__(self, in_dim: int, n_classes: int, s: float = CFG.scale_ce): super().__init__() self.s = s self.weight = nn.Parameter(torch.randn(n_classes, in_dim)) nn.init.xavier_uniform_(self.weight) def forward(self, x: torch.Tensor): # x ∈ ℝ^{B×d_p} x = L2_normalise(x) w = L2_normalise(self.weight) # logits = s * cos(θ) return self.s * (x @ w.T) # -------------------------- BaPSTO model -------------------------- # class BaPSTO(nn.Module): """Backbone + DASSER heads + BPGSNet prototypes & gates.""" def __init__(self, n_classes: int): super().__init__() # --- Backbone (ResNet‑18) ------------------------------------------------ resnet = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1) pretrained_path = Path(CFG.save_root) / "resnet18_best_TILDA_8class_73_7446.pth" if pretrained_path.exists(): print(f"Loading pretrained weights from {pretrained_path}") pretrained = torch.load(pretrained_path, map_location=CFG.device, weights_only=True) # 创建临时模型来获取预训练权重的正确映射 temp_model = models.resnet18() temp_model.fc = nn.Linear(temp_model.fc.in_features, n_classes) temp_model.load_state_dict(pretrained["state_dict"], strict=False) # 复制预训练权重到我们的模型中(除了fc层) resnet_dict = resnet.state_dict() pretrained_dict = {k: v for k, v in temp_model.state_dict().items() if k in resnet_dict and 'fc' not in k} resnet_dict.update(pretrained_dict) resnet.load_state_dict(resnet_dict) print("✓ Successfully loaded pretrained backbone weights!") else: print(f"⚠️ Pretrained weights not found at {pretrained_path}. Using ImageNet weights.") # --- Backbone ------------------------------------------------ in_feat = resnet.fc.in_features # 512 resnet.fc = nn.Identity() self.backbone = resnet # project to d_backbone (512-64-128) #self.fc_backbone = nn.Linear(in_feat, CFG.d_backbone, bias=False) #nn.init.xavier_uniform_(self.fc_backbone.weight) # 这一句的 # --- Projection heads --------------------------------------------------- self.g_SA = MLPHead(CFG.d_backbone, CFG.d_proj) self.g_FV = MLPHead(CFG.d_backbone, CFG.d_proj) # Cosine classifier (coarse level) self.classifier = CosineLinear(CFG.d_proj, n_classes) # --- BPGSNet prototypes & gate logits ----------------------------------- self.prototypes = nn.Parameter( torch.randn(n_classes, CFG.K_max, CFG.d_proj) ) # (K_C, K_max, d_p) nn.init.xavier_uniform_(self.prototypes) self.log_alpha = nn.Parameter( torch.randn(n_classes, CFG.K_max) * 0.01 # 随机初始化 ) # (K_C, K_max) self.register_buffer("global_step", torch.tensor(0, dtype=torch.long)) # ---------------- Forward pass ---------------- # def forward(self, x: torch.Tensor, y_c: torch.Tensor, mem_bank: MemoryBank, use_bpgs: bool = True ) -> tuple[torch.Tensor, dict[str, float], torch.Tensor, torch.Tensor]: """Return full loss components (Section §3.3 & §3.4).""" B = x.size(0) # --- Backbone & projections ------------------------------------------- z = self.backbone(x) # (B, 512) p = L2_normalise(self.g_SA(z)) # (B, d_p) q = L2_normalise(self.g_FV(z)) # (B, d_p) bank_p, bank_q, bank_y = mem_bank.get() # ---------------- DASSER losses ---------------- # # L_SA, L_ortho, L_ce_dasser = self._dasser_losses( # p, q, y_c, bank_p, bank_q, bank_y # ) # total_loss = L_SA + L_ortho + L_ce_dasser # stats = { # "loss": total_loss.item(), # "L_SA": L_SA.item(), # "L_ortho": L_ortho.item(), # "L_ce_dasser": L_ce_dasser.item(), # } L_SA, L_ortho, L_ce_dasser, L_se = self._dasser_losses( p, q, y_c, bank_p, bank_q, bank_y ) total_loss = ( L_SA + L_ortho + L_ce_dasser + CFG.gamma_se * L_se # NEW ) stats = { "loss": total_loss.item(), "L_SA": L_SA.item(), "L_ortho": L_ortho.item(), "L_ce_dasser": L_ce_dasser.item(), "L_se": L_se.item(), # NEW } # ---------------- BPGSNet (conditional) -------- # if use_bpgs: L_ce_bpgs, L_proto, L_gate, coarse_logits = self._bpgs_losses(q, y_c) total_loss = total_loss + L_ce_bpgs + L_proto + L_gate stats.update({ "L_ce_bpgs": L_ce_bpgs.item(), "L_proto": L_proto.item(), "L_gate": L_gate.item(), }) else: coarse_logits = None return total_loss, stats, p.detach(), q.detach() # ---------------------- Internal helpers ---------------------- # def _dasser_losses( self, p: torch.Tensor, q: torch.Tensor, y_c: torch.Tensor, bank_p: torch.Tensor, bank_q: torch.Tensor, bank_y: torch.Tensor, ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: """ DASSER 损失: • 语义对齐 L_SA • 正交 L_ortho • 粗粒度 CE L_ce • 自表示 L_se (NEW) """ # ---------- 拼 batch + memory ---------- # p_all = torch.cat([p, bank_p], dim=0) if bank_p.numel() > 0 else p q_all = torch.cat([q, bank_q], dim=0) if bank_q.numel() > 0 else q y_all = torch.cat([y_c, bank_y], dim=0) if bank_y.numel() > 0 else y_c # ---------- 1) 语义对齐 (原有) ---------- # G = pairwise_cosine(p_all, p_all) # (N,N) :contentReference[oaicite:2]{index=2} G.fill_diagonal_(0.0) same = y_all.unsqueeze(0) == y_all.unsqueeze(1) diff = ~same L_SA = ((same * (1 - G)).sum() + CFG.lambda1 * (diff * G.clamp_min(0)).sum()) / (p_all.size(0) ** 2) # ---------- 2) 正交 (原有) --------------- # L_ortho = (1.0 / CFG.d_proj) * (p_all @ q_all.T).pow(2).sum() # ---------- 3) 自表示 (NEW) -------------- # C_logits = pairwise_cosine(p_all, p_all) # 再算一次以免受上一步改动 C_logits.fill_diagonal_(-1e4) # 置 −∞ → softmax≈0 C = F.softmax(C_logits, dim=1) # 行归一化 :contentReference[oaicite:3]{index=3} Q_recon = C @ q_all # 线性重构 L_se = F.mse_loss(Q_recon, q_all) # :contentReference[oaicite:4]{index=4} # ---------- 4) 粗粒度 CE (原有) ---------- # logits_coarse = self.classifier(p) L_ce = F.cross_entropy(logits_coarse, y_c) return L_SA, L_ortho, L_ce, L_se # ---------------------- 放到 BaPSTO 类里,直接替换原函数 ---------------------- # def _bpgs_losses( self, q: torch.Tensor, y_c: torch.Tensor ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: """ 计算 BPGSNet 损失(正确的 log-sum-exp 版) """ B = q.size(0) # q是batch*128的矩阵,获得批次大小 K_C, K_M = self.prototypes.size(0), self.prototypes.size(1) # K_C 是类别数,K_M 是每个类别的原型数 # (1) 欧氏距离 d = ((q.unsqueeze(1).unsqueeze(2) - self.prototypes.unsqueeze(0)) ** 2).sum(-1) # (B,K_C,K_M) s = 30.0 # ===== (2) 退火温度 τ ===== # τ 线性退火 epoch = self.global_step.item() / self.steps_per_epoch tau = max(CFG.tau_min_hc, CFG.tau0_hc - (CFG.tau0_hc - CFG.tau_min_hc) * min(1., epoch / CFG.anneal_epochs_hc)) # ----- (3) Hard- ----- log_alpha = self.log_alpha # (C,K) z, _s = self._sample_hardConcrete(log_alpha, tau) # z: (C,K) g = z.unsqueeze(0) # (1,C,K) 广播到 batch # (1,C,K) # ----- (4) coarse logits ----- mask_logits = -d * s + torch.log(g + 1e-12) # (B,C,K) coarse_logits = torch.logsumexp(mask_logits, dim=2) # (B,C) # ----- (5) losses ----- L_ce = F.cross_entropy(coarse_logits, y_c) y_hat = torch.softmax(mask_logits.detach(), dim=2) # stop-grad L_proto = CFG.alpha_proto * (y_hat * d).mean() # ---------- Hard-Concrete 的 L0 正则 ---------- temp = (log_alpha - tau * math.log(-CFG.gamma_hc / CFG.zeta_hc)) # (C,K) p_active = torch.sigmoid(temp) # 激活概率 p_active 是解析期望 pa(z大于0) # 新增加得loss pa = torch.sigmoid(log_alpha) entropy_penalty = 0.05 * (pa * torch.log(pa + 1e-8) + (1-pa) * torch.log(1-pa + 1e-8)).mean() # 新增加得loss,控制全局稀疏率 L_gate = CFG.beta_l0 * p_active.mean() - entropy_penalty # L0 正则 beta_l0 控控制全局稀疏率 return L_ce, L_proto, L_gate, coarse_logits def _sample_hardConcrete(self, log_alpha, tau): """return z ~ HardConcrete, and its stretched unclipped \tilde z""" u = torch.rand_like(log_alpha).clamp_(1e-6, 1-1e-6) s = torch.sigmoid((log_alpha + torch.log(u) - torch.log(1-u)) / tau) s = s * (CFG.zeta_hc - CFG.gamma_hc) + CFG.gamma_hc # stretch z_hard = s.clamp(0.0, 1.0) z = z_hard + (s - s.detach()) # ST estimator,让梯度穿过 return z, s # z用于前向, s用于梯度 # -------------------------- K-means++ initialisation -------------------------- # @torch.no_grad() def kmeans_init(model: BaPSTO, loader: DataLoader): """Use q‑features to initialise prototypes with K‑means++ (§3.4.1).""" print("[Init] Running K‑means++ for prototype initialisation...") model.eval() all_q, all_y = [], [] for x, y in loader: x = x.to(CFG.device) z = L2_normalise(model.g_FV(model.backbone(x))) all_q.append(z.cpu()) all_y.append(y) all_q = torch.cat(all_q) # (N, d_p) all_y = torch.cat(all_y) # (N,) for c in range(model.prototypes.size(0)): feats = all_q[all_y == c] kmeans = KMeans( n_clusters=CFG.K_max, init="k-means++", n_init=10, max_iter=100, random_state=CFG.seed, ).fit(feats.numpy()) centroids = torch.from_numpy(kmeans.cluster_centers_).to(CFG.device) centroids = L2_normalise(centroids) # (K_max, d_p) model.prototypes.data[c] = centroids print("[Init] Prototype initialisation done.") # -------------------------- Training utilities -------------------------- # def accuracy(output: torch.Tensor, target: torch.Tensor) -> float: """Compute top‑1 accuracy (coarse).""" with torch.no_grad(): pred = output.argmax(dim=1) correct = pred.eq(target).sum().item() return correct / target.size(0) @torch.no_grad() def _collect_Q_labels(model: BaPSTO, loader: DataLoader): """遍历 *loader*,返回 (Q features, coarse-ID, proto-ID);采样上限 MAX_SAMPLED.""" model.eval() qs, cls, subs = [], [], [] for x, y in loader: x = x.to(CFG.device) q = L2_normalise(model.g_FV(model.backbone(x))) # (B,d) # —— 预测最近原型 idx —— # d = ((q.unsqueeze(1).unsqueeze(2) - model.prototypes.unsqueeze(0))**2).sum(-1) # (B,C,K) proto_id = d.view(d.size(0), -1).argmin(dim=1) # flatten idx = C*K + k qs.append(q.cpu()) cls.append(y) subs.append(proto_id.cpu()) if MAX_SAMPLED and (sum(len(t) for t in qs) >= MAX_SAMPLED): break Q = torch.cat(qs)[:MAX_SAMPLED] # (N,d) Yc = torch.cat(cls)[:MAX_SAMPLED] # coarse Ysub = torch.cat(subs)[:MAX_SAMPLED] # pseudo-fine return Q.numpy(), Yc.numpy(), Ysub.numpy() def _plot_heatmap(mat: np.ndarray, title: str, path: Path, boxes: list[tuple[int,int]] | None = None): """ mat : 排好序的相似度矩阵 boxes : [(row_start,row_end), ...];坐标在排序后的索引系中 """ plt.figure(figsize=(6, 5)) ax = plt.gca() im = ax.imshow(mat, cmap="viridis", aspect="auto") plt.colorbar(im) if boxes: # 逐个 coarse-class 画框 for s, e in boxes: w = e - s rect = Rectangle((s - .5, s - .5), w, w, linewidth=1.5, edgecolor="white", facecolor="none") ax.add_patch(rect) plt.title(title) plt.tight_layout() plt.savefig(path, dpi=300) plt.close() def compute_and_save_diagnostics(model: BaPSTO, loader: DataLoader, tag: str): """ • 计算三个内部指标并保存 csv • 绘制五张图 (C heatmap, t-SNE / UMAP, Laplacian spectrum, Silhouette bars, Gate heatmap(opt)) """ print(f"[Diag] computing metrics ({tag}) ...") timestamp = get_timestamp() Q, Yc, Ysub = _collect_Q_labels(model, loader) # ========== 1) 聚类指标 ========== # sil = silhouette_score(Q, Ysub, metric="cosine") ch = calinski_harabasz_score(Q, Ysub) db = davies_bouldin_score(Q, Ysub) pd.DataFrame( {"tag":[tag], "silhouette":[sil], "calinski":[ch], "davies":[db]} ).to_csv(DIAG_DIR / f"cluster_metrics_{tag}_{timestamp}.csv", index=False) # ========== 2) C heatmap & Laplacian ========== # GRAPH_LEVEL = 'coarse' # ← 这里换 'sub' 就看细粒度--------------------------------------------------- # ① —— 相似度矩阵(始终基于所有样本,用来画热力图) —— # P_all = Q @ Q.T / np.linalg.norm(Q, axis=1, keepdims=True) / np.linalg.norm(Q, axis=1)[:, None] np.fill_diagonal(P_all, -1e4) # 取消自环 C_heat = torch.softmax(torch.tensor(P_all), dim=1).cpu().numpy() # —— 画热力图:完全沿用旧逻辑,不受 GRAPH_LEVEL 影响 —— # order = np.lexsort((Ysub, Yc)) # 先 coarse 再 sub #order = np.argsort(Yc) # 只按粗类别拍平---------------------- # —— 计算每个 coarse-class 的起止行(列) —— # coarse_sorted = Yc[order] bounds = [] # [(start,end),...] start = 0 for i in range(1, len(coarse_sorted)): if coarse_sorted[i] != coarse_sorted[i-1]: bounds.append((start, i)) # [start, end) start = i bounds.append((start, len(coarse_sorted))) # —— 绘图,并把边界传给 boxes 参数 —— # _plot_heatmap(C_heat[order][:, order], f"C heatmap ({tag})", DIAG_DIR / f"C_heatmap_{tag}_{timestamp}.png", boxes=bounds) # ② —— 针对 Laplacian 的图,可选按 coarse/sub 屏蔽 —— # P_graph = P_all.copy() # 从全局矩阵复制一份 if GRAPH_LEVEL == 'coarse': P_graph[Yc[:, None] != Yc[None, :]] = -1e4 # 只留同 coarse 的边 elif GRAPH_LEVEL == 'sub': P_graph[Ysub[:, None] != Ysub[None, :]] = -1e4 # 只留同子簇的边 C_graph = torch.softmax(torch.tensor(P_graph), dim=1).cpu().numpy() D = np.diag(C_graph.sum(1)) L = D - (C_graph + C_graph.T) / 2 eigs = np.sort(np.linalg.eigvalsh(L))[:30] plt.figure(); plt.plot(eigs, marker='o') plt.title(f"Laplacian spectrum ({GRAPH_LEVEL or 'global'} | {tag})") plt.tight_layout() plt.savefig(DIAG_DIR / f"laplacian_{tag}_{timestamp}.png", dpi=300); plt.close() # ========== 3) t-SNE / UMAP (带图例 & 色彩 ≤20) ========== # warnings.filterwarnings("ignore", message="n_jobs value 1") focus_cls = 1#None # ← 若只看 coarse ID=3,把它改成 3 sel = slice(None) if focus_cls is None else (Yc == focus_cls) Q_sel, Ysub_sel = Q[sel], Ysub[sel] # -- 选 UMAP 或 t-SNE -- if HAS_UMAP: # :contentReference[oaicite:2]{index=2} reducer_cls = umap.UMAP if hasattr(umap, "UMAP") else umap.umap_.UMAP reducer = reducer_cls(n_neighbors=30, min_dist=0.1, random_state=CFG.seed) method = "UMAP" else: reducer = TSNE(perplexity=30, init="pca", random_state=CFG.seed) method = "t-SNE" emb = reducer.fit_transform(Q_sel) # (N,2) # ---------- scatter ---------- # unique_sub = np.unique(Ysub_sel) try: # 新版 Matplotlib (≥3.7) cmap = plt.get_cmap("tab20", min(len(unique_sub), 20)) except TypeError: # 旧版 Matplotlib (<3.7) cmap = plt.cm.get_cmap("tab20", min(len(unique_sub), 20)) plt.figure(figsize=(5, 5)) for i, s_id in enumerate(unique_sub): pts = Ysub_sel == s_id plt.scatter(emb[pts, 0], emb[pts, 1], color=cmap(i % 20), s=6, alpha=0.7, label=str(s_id) if len(unique_sub) <= 20 else None) if len(unique_sub) <= 20: plt.legend(markerscale=2, bbox_to_anchor=(1.02, 1), borderaxespad=0.) title = f"{method} ({tag})" if focus_cls is None else f"{method} cls={focus_cls} ({tag})" plt.title(title) plt.tight_layout() plt.savefig(DIAG_DIR / f"embed_{tag}_{timestamp}.png", dpi=300) plt.close() # ========== 4) Silhouette bars ========== # sil_samples = silhouette_samples(Q, Ysub, metric="cosine") order = np.argsort(Ysub) plt.figure(figsize=(6,4)) plt.barh(np.arange(len(sil_samples)), sil_samples[order], color="steelblue") plt.title(f"Silhouette per sample ({tag})"); plt.xlabel("coefficient") plt.tight_layout(); plt.savefig(DIAG_DIR / f"silhouette_bar_{tag}_{timestamp}.png", dpi=300); plt.close() print(f"[Diag] saved to {DIAG_DIR}") def create_dataloaders() -> Tuple[DataLoader, DataLoader, int]: """Load train/val as ImageFolder and return dataloaders + K_C.""" train_dir = Path(CFG.data_root) / "train" val_dir = Path(CFG.data_root) / "test" classes = sorted([d.name for d in train_dir.iterdir() if d.is_dir()]) K_C = len(classes) transform_train = transforms.Compose( [ transforms.Grayscale(num_output_channels=3), transforms.Resize((CFG.img_size, CFG.img_size)), transforms.RandomHorizontalFlip(), transforms.RandomRotation(10), transforms.RandomResizedCrop(CFG.img_size, scale=(0.8, 1.0)), transforms.ToTensor(), transforms.Normalize(mean=[0.5] * 3, std=[0.5] * 3), ] ) transform_val = transforms.Compose( [ transforms.Grayscale(num_output_channels=3), transforms.Resize((CFG.img_size, CFG.img_size)), transforms.ToTensor(), transforms.Normalize(mean=[0.5] * 3, std=[0.5] * 3), ] ) train_ds = datasets.ImageFolder(str(train_dir), transform=transform_train) val_ds = datasets.ImageFolder(str(val_dir), transform=transform_val) train_loader = DataLoader( train_ds, batch_size=CFG.batch_size, shuffle=True, num_workers=CFG.num_workers, pin_memory=True, drop_last=True, ) val_loader = DataLoader( val_ds, batch_size=CFG.batch_size, shuffle=False, num_workers=CFG.num_workers, pin_memory=True, ) return train_loader, val_loader, K_C # -------------------------- Main training routine -------------------------- # def train(): best_ckpt_path = None # 记录最佳 joint 权重的完整文件名 best_acc = 0.0 best_epoch = -1 train_loader, val_loader, K_C = create_dataloaders() model = BaPSTO(K_C).to(CFG.device) model.steps_per_epoch = len(train_loader) #print(model) mb = MemoryBank(dim=CFG.d_proj, size=CFG.mem_size) warmup_weights_path = Path(CFG.save_root) / "bapsto_warmup_complete.pth" # 检查是否存在预保存的warm-up权重 if warmup_weights_path.exists(): print(f"找到预训练的warm-up权重,正在加载: {warmup_weights_path}") checkpoint = torch.load(warmup_weights_path, map_location=CFG.device,weights_only=True) model.load_state_dict(checkpoint["state_dict"]) print("✓ 成功加载warm-up权重,跳过warm-up阶段!") else: # ---------- Phase 1: DASSER warm‑up (backbone frozen) ---------- # print("\n==== Phase 1 | DASSER warm‑up ====") for p in model.backbone.parameters(): p.requires_grad = False # —— 冻结 prototypes 和 gate_logits —— # model.prototypes.requires_grad = False model.log_alpha.requires_grad = False # —— 冻结 prototypes 和 gate_logits —— # optimizer = optim.AdamW( filter(lambda p: p.requires_grad, model.parameters()), lr=CFG.lr_warmup, weight_decay=CFG.weight_decay, betas=(0.9, 0.95), ) scheduler = CosineAnnealingLR(optimizer, T_max=len(train_loader) * CFG.n_epochs_warmup) for epoch in range(CFG.n_epochs_warmup): run_epoch(train_loader, model, mb, optimizer, scheduler, epoch, phase="warmup") # 保存warm-up完成后的权重 torch.save( {"epoch": CFG.n_epochs_warmup, "state_dict": model.state_dict()}, warmup_weights_path ) print(f"✓ Warm-up完成,模型权重已保存至: {warmup_weights_path}") # after warm‑up loop, before Phase 2 header kmeans_init(model, train_loader) # <─ 新增 print("K‑means initialisation done. Prototypes are now ready.") compute_and_save_diagnostics(model, train_loader, tag="after_kmeans") # ---------- Phase 2: Joint optimisation (all params trainable) ---------- # print("\n==== Phase 2 | Joint optimisation ====") for p in model.backbone.parameters(): p.requires_grad = True # —— 解冻 prototypes 和 gate logits —— # model.prototypes.requires_grad = True model.log_alpha.requires_grad = True # —— 解冻 prototypes 和 gate logits —— # param_groups = [ {"params": [p for n,p in model.named_parameters() if n!='log_alpha'], "lr": CFG.lr_joint}, {"params": [model.log_alpha], "lr": CFG.lr_joint * 2.0} ] optimizer = optim.AdamW( param_groups, weight_decay=CFG.weight_decay, betas=(0.9, 0.95), ) scheduler = CosineAnnealingLR(optimizer, T_max=len(train_loader) * CFG.n_epochs_joint) best_acc = 0.0 best_epoch = -1 epochs_no_improve = 0 for epoch in range(CFG.n_epochs_joint): stats = run_epoch(train_loader, model, mb, optimizer, scheduler, epoch, phase="joint") # ─────────────────────────────────────────── if (epoch + 1) % 1 == 0: # 每个 epoch 都跑验证 # —— 每 5 个 epoch 额外保存 Gate & 聚类诊断 —— # if (epoch + 1) % 5 == 0: timestamp = get_timestamp() gate_prob = torch.sigmoid(model.log_alpha.detach().cpu()) _plot_heatmap( gate_prob, f"Gate prob (ep{epoch+1})", DIAG_DIR / f"gate_ep{epoch+1}_{timestamp}.png", ) compute_and_save_diagnostics( model, train_loader, tag=f"joint_ep{epoch+1}" ) # ---------- 统计指标 ---------- val_loss, val_acc, per_cls_acc, auc = metrics_on_loader(val_loader, model) train_acc = metrics_on_loader (train_loader, model)[1] # 只取整体训练准确率 print(f"[Val] ep {epoch+1:02d} | loss {val_loss:.3f} | " f"acc {val_acc:.3f} | train-acc {train_acc:.3f} |\n" f" per-cls-acc {np.round(per_cls_acc, 2)} |\n" f" AUC {np.round(auc, 2)}") # —— checkpoint —— # if val_acc > best_acc: best_acc = val_acc best_epoch = epoch epochs_no_improve = 0 best_ckpt_path = save_ckpt(model, epoch, tag="best_joint", acc=val_acc, optimizer=optimizer, scheduler=scheduler) # ← 传进去 else: epochs_no_improve += 1 # —— gate 修剪 —— # if epoch+1 >= 10: # 先训练 10 个 epoch 再剪 prune_gates(model, threshold=0.25, min_keep=1, hc_threshold=CFG.hc_threshold) # —— early stopping —— # if epochs_no_improve >= 50: print("Early stopping triggered in joint phase.") break # ─────────────────────────────────────────── model.global_step += 1 print(model.prototypes.grad.norm()) # 非零即可证明 L_proto 对原型确实有更新压力 model.global_step.zero_() # Joint训练结束后,重命名最佳模型文件,添加准确率 best_acc_int = round(best_acc * 1e4) # 将0.7068转换为7068 joint_ckpt_path = Path(CFG.save_root) / "bapsto_best_joint.pth" renamed_path = Path(CFG.save_root) / f"bapsto_best_joint_{best_acc_int}.pth" if joint_ckpt_path.exists(): joint_ckpt_path.rename(renamed_path) best_ckpt_path = renamed_path # ★ 同步路径,供 fine-tune 使用 print(f"✓ 最优联合训练模型已重命名: {renamed_path.name} " f"(epoch {best_epoch+1}, ACC: {best_acc:.4f})") # ---------- Phase 3: Fine‑tune (prototypes & gates frozen) ---------- # print("\n==== Phase 3 | Fine‑tuning ====") best_ft_acc = 0.0 best_ft_epoch = -1 # 若有最佳 joint 权重则加载 if best_ckpt_path is not None and Path(best_ckpt_path).exists(): ckpt = torch.load(best_ckpt_path, map_location=CFG.device, weights_only=True) model.load_state_dict(ckpt["state_dict"]) epoch_loaded = ckpt["epoch"] + 1 # 以 1 为起点的人类可读轮次 acc_loaded = ckpt.get("acc", -1) # 若早期代码没存 acc,给个占位 print(f"✓ loaded best joint ckpt (epoch {epoch_loaded}, ACC {acc_loaded:.4f})") else: print("⚠️ best_ckpt_path 未找到,继续沿用上一轮权重。") for param in [model.prototypes, model.log_alpha]: param.requires_grad = False for p in model.parameters(): if p.requires_grad: p.grad = None # clear any stale gradients optimizer = optim.AdamW( filter(lambda p: p.requires_grad, model.parameters()), lr=CFG.lr_ft, weight_decay=CFG.weight_decay, betas=(0.9, 0.95), ) scheduler = CosineAnnealingLR(optimizer, T_max=len(train_loader) * CFG.n_epochs_ft) for epoch in range(CFG.n_epochs_ft): run_epoch(train_loader, model, mb, optimizer, scheduler, epoch, phase="finetune") if (epoch + 1) % 1 == 0: # 每个 epoch 都评估 val_acc = evaluate(val_loader, model) print(f"[FT] ep {epoch+1:02d} | acc {val_acc:.4f}") # ① 按 epoch 保存快照(可选) save_ckpt(model, epoch, tag="ft") # ② 维护 “fine-tune 最佳” if val_acc > best_ft_acc: best_ft_acc = val_acc best_ft_epoch = epoch best_ft_acc_int = round(best_ft_acc * 1e4) # 将0.7068转换为7068 best_ft_ckpt_path = Path(CFG.save_root) / f"bapsto_best_ft_{best_ft_acc_int}.pth" save_ckpt(model, epoch, tag="best_ft", acc=val_acc) # 只保留一个最新 best_ft # 重命名保存文件 if best_ft_ckpt_path.exists(): best_ft_ckpt_path.rename(best_ft_ckpt_path) print(f"✓ Fine-tune最佳模型已重命名: {best_ft_ckpt_path.name} (epoch {best_ft_epoch+1}, ACC: {best_ft_acc:.4f})") print(f"Training completed. Best FT ACC {best_ft_acc:.4f}") # -------------------------- Helper functions -------------------------- # def run_epoch(loader, model, mem_bank: MemoryBank, optimizer, scheduler, epoch, phase:str): model.train() running = {"loss": 0.0} use_bpgs = (phase != "warmup") for step, (x, y) in enumerate(loader): x, y = x.to(CFG.device), y.to(CFG.device) optimizer.zero_grad() loss, stats, p_det, q_det = model(x, y, mem_bank, use_bpgs=use_bpgs) loss.backward() optimizer.step() scheduler.step() mem_bank.enqueue(p_det, q_det, y.detach()) # accumulate for k, v in stats.items(): running[k] = running.get(k, 0.0) + v # ★★★★★ Hard-Concrete 梯度健康检查 ★★★★★ if phase == "joint" and step % 100 == 0: # ─── Hard-Concrete 监控 ─── tau_now = max( CFG.tau_min_hc, CFG.tau0_hc - (CFG.tau0_hc - CFG.tau_min_hc) * min(1.0, model.global_step.item() / (model.steps_per_epoch * CFG.anneal_epochs_hc)) ) pa = torch.sigmoid(model.log_alpha) # (C,K) p_act = pa.mean().item() alive = (pa > 0.4).float().sum().item() # 0.4 与 prune 阈值一致 total = pa.numel() # = C × K grad_nm = (model.log_alpha.grad.detach().norm().item() if model.log_alpha.grad is not None else 0.0) pa = torch.sigmoid(model.log_alpha) print(f"[DBG] τ={tau_now:.3f} p̄={pa.mean():.3f} " f"min={pa.min():.2f} max={pa.max():.2f} " f"alive={(pa>0.25).sum().item()}/{pa.numel()} " f"‖∇α‖={grad_nm:.2e}") # ★★★★★ 监控段结束 ★★★★★ if (step + 1) % 50 == 0: avg_loss = running["loss"] / (step + 1) print( f"Epoch[{phase} {epoch+1}] Step {step+1}/{len(loader)} | " f"loss: {avg_loss:.4f}", end="\r", ) # epoch summary print(f"Epoch [{phase} {epoch+1}]: " + ', '.join(f"{k}: {running[k]:.4f}" for k in running)) return running @torch.no_grad() def evaluate(loader, model): model.eval() total_correct, total_samples = 0, 0 K_C, K_M = model.prototypes.size(0), model.prototypes.size(1) gate_hard = (model.log_alpha > 0).float() # (K_C,K_M) for x, y in loader: x, y = x.to(CFG.device), y.to(CFG.device) b = x.size(0) # --- 特征 & 距离 --- q = L2_normalise(model.g_FV(model.backbone(x))) # (b,d_p) d = ((q.unsqueeze(1).unsqueeze(2) - model.prototypes.unsqueeze(0))**2).sum(-1) # (b,K_C,K_M) s = 30.0 # scale for logits # --- 子簇 logit & 粗 logit --- mask_logits = -d * s + torch.log(gate_hard + 1e-12) # (b,K_C,K_M) # 这里由于是log,所以二者相加 coarse_logits = torch.logsumexp(mask_logits, dim=2) # (b,K_C) # --- 统计准确率 --- total_correct += coarse_logits.argmax(1).eq(y).sum().item() total_samples += b return total_correct / total_samples @torch.no_grad() def metrics_on_loader(loader, model): """ 返回: loss_avg – 均值交叉熵 acc – overall top-1 per_cls_acc (C,) – 每个 coarse 类别准确率 auc (C,) – 每类 one-vs-rest ROC-AUC """ model.eval() n_cls = model.prototypes.size(0) total_loss, total_correct, total_samples = 0., 0, 0 # —— 用来存储全量 logits / labels —— # logits_all, labels_all = [], [] ce_fn = nn.CrossEntropyLoss(reduction="sum") # 累加再除 for x, y in loader: x, y = x.to(CFG.device), y.to(CFG.device) # 前向 with torch.no_grad(): q = L2_normalise(model.g_FV(model.backbone(x))) d = ((q.unsqueeze(1).unsqueeze(2) - model.prototypes.unsqueeze(0))**2).sum(-1) logits = torch.logsumexp(-d*30 + torch.log((model.log_alpha>0).float()+1e-12), dim=2) total_loss += ce_fn(logits, y).item() total_correct += logits.argmax(1).eq(y).sum().item() total_samples += y.size(0) logits_all.append(logits.cpu()) labels_all.append(y.cpu()) # —— overall —— # loss_avg = total_loss / total_samples acc = total_correct / total_samples # —— 拼接 & 转 numpy —— # logits_all = torch.cat(logits_all).numpy() labels_all = torch.cat(labels_all).numpy() # —— per-class ACC —— # per_cls_acc = np.zeros(n_cls) for c in range(n_cls): mask = labels_all == c if mask.any(): per_cls_acc[c] = (logits_all[mask].argmax(1) == c).mean() # —— per-class AUC —— # try: from sklearn.metrics import roc_auc_score prob = torch.softmax(torch.from_numpy(logits_all), dim=1).numpy() auc = roc_auc_score(labels_all, prob, multi_class="ovr", average=None) except Exception: # 组数太少或只有 1 类样本时会报错 auc = np.full(n_cls, np.nan) return loss_avg, acc, per_cls_acc, auc def save_ckpt(model, epoch:int, tag:str, acc:float|None=None, optimizer=None, scheduler=None): """ 通用保存函数 • 返回 ckpt 文件完整路径,方便上层记录 • 可选把 opt / sched state_dict 一起存进去,便于 resume """ save_dir = Path(CFG.save_root) save_dir.mkdir(parents=True, exist_ok=True) # -------- 路径策略 -------- # if tag == "best_joint": # 只保留一个最新最优 joint ckpt_path = save_dir / "bapsto_best_joint.pth" else: # 其他阶段带时间戳 ckpt_path = save_dir / f"bapsto_{tag}_epoch{epoch+1}_{get_timestamp()}.pth" # -------- 组装 payload -------- # # • vars(CFG) 可以拿到用户自己在 CFG 里写的字段 # • 再过滤掉 __ 开头的内部键、防止把 Python meta-data 也 dump 进去 cfg_dict = {k: v for k, v in vars(CFG).items() if not k.startswith("__")} payload = { "epoch": epoch, "state_dict": model.state_dict(), "cfg": cfg_dict, # ← 改在这里 } if acc is not None: payload["acc"] = acc if optimizer is not None: payload["optimizer"] = optimizer.state_dict() if scheduler is not None: payload["scheduler"] = scheduler.state_dict() torch.save(payload, ckpt_path) print(f"✓ checkpoint saved to {ckpt_path}") return ckpt_path @torch.no_grad() def prune_gates(model: BaPSTO, threshold=0.05, min_keep=2, hc_threshold=0.35): """ Disable sub-clusters whose mean gate probability < threshold. After setting them to -10, we do another **row normalization**: Each coarse class row is subtracted by the max logit of that row, ensuring the maximum logit for active clusters is 0 and inactive clusters ≈ -10 → softmax(-10) ≈ 0. Also check for Hard-Concrete (HC) weights below a threshold (e.g., 0.35) to disable sub-clusters. """ # softmax probabilities (K_C, K_max) p_active = torch.sigmoid(model.log_alpha) # Activation probability mask = (p_active < threshold) # Check HC thresholds and disable low weight clusters low_weight_mask = (p_active < hc_threshold) # Find sub-clusters with low HC weight mask = mask | low_weight_mask # Combine with existing mask # Ensure at least min_keep sub-clusters are kept per coarse class keep_mask = (mask.cumsum(1) >= (CFG.K_max - min_keep)) mask = mask & ~keep_mask pruned = mask.sum().item() if pruned == 0: return model.log_alpha.data[mask] = -10.0 # Set log_alpha of pruned sub-clusters to a very low value print(f"Pruned {pruned} sub-clusters (ḡ<{threshold}, keep≥{min_keep}/class)") # Reassign samples from pruned sub-clusters to active sub-clusters if pruned > 0: # Find the indices of the pruned sub-clusters pruned_clusters = mask.sum(dim=1) > 0 # (K_C,) for c in range(model.prototypes.size(0)): # Loop through each coarse class if pruned_clusters[c]: pruned_indices = mask[c] # Get indices of pruned sub-clusters for class c active_indices = ~pruned_indices # Get indices of active sub-clusters active_prototypes = model.prototypes[c][active_indices] # Get active prototypes q = model.q # Get features # Reassign samples from pruned clusters to active clusters d_active = pairwise_cosine(q, active_prototypes) # Compute distance to active prototypes best_active = d_active.argmin(dim=1) # Assign samples to the nearest active sub-cluster # Update the model with reallocated samples (you can implement reallocation logic here) print(f"Reassigning samples from pruned sub-clusters of class {c} to active clusters.") # -------------------------- Entrypoint -------------------------- # if __name__ == "__main__": os.makedirs(CFG.save_root, exist_ok=True) start = time.time() train() print(f"Total runtime: {(time.time() - start) / 3600:.2f} h") 逐行详细解释代码

这是main.py文件的代码:from datetime import datetime from functools import partial from PIL import Image import cv2 import numpy as np from torch.utils.data import DataLoader from torch.version import cuda from torchvision import transforms from torchvision.datasets import CIFAR10 from torchvision.models import resnet from tqdm import tqdm import argparse import json import math import os import pandas as pd import torch import torch.nn as nn import torch.nn.functional as F #数据增强(核心增强部分) import torch from torchvision import transforms from torch.utils.data import Dataset, DataLoader # 设置参数 parser = argparse.ArgumentParser(description='Train MoCo on CIFAR-10') parser.add_argument('-a', '--arch', default='resnet18') # lr: 0.06 for batch 512 (or 0.03 for batch 256) parser.add_argument('--lr', '--learning-rate', default=0.06, type=float, metavar='LR', help='initial learning rate', dest='lr') parser.add_argument('--epochs', default=300, type=int, metavar='N', help='number of total epochs to run') parser.add_argument('--schedule', default=[120, 160], nargs='*', type=int, help='learning rate schedule (when to drop lr by 10x); does not take effect if --cos is on') parser.add_argument('--cos', action='store_true', help='use cosine lr schedule') parser.add_argument('--batch-size', default=64, type=int, metavar='N', help='mini-batch size') parser.add_argument('--wd', default=5e-4, type=float, metavar='W', help='weight decay') # moco specific configs: parser.add_argument('--moco-dim', default=128, type=int, help='feature dimension') parser.add_argument('--moco-k', default=4096, type=int, help='queue size; number of negative keys') parser.add_argument('--moco-m', default=0.99, type=float, help='moco momentum of updating key encoder') parser.add_argument('--moco-t', default=0.1, type=float, help='softmax temperature') parser.add_argument('--bn-splits', default=8, type=int, help='simulate multi-gpu behavior of BatchNorm in one gpu; 1 is SyncBatchNorm in multi-gpu') parser.add_argument('--symmetric', action='store_true', help='use a symmetric loss function that backprops to both crops') # knn monitor parser.add_argument('--knn-k', default=20, type=int, help='k in kNN monitor') parser.add_argument('--knn-t', default=0.1, type=float, help='softmax temperature in kNN monitor; could be different with moco-t') # utils parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--results-dir', default='', type=str, metavar='PATH', help='path to cache (default: none)') ''' args = parser.parse_args() # running in command line ''' args = parser.parse_args('') # running in ipynb # set command line arguments here when running in ipynb args.epochs = 300 # 修改处 args.cos = True args.schedule = [] # cos in use args.symmetric = False if args.results_dir == '': args.results_dir = "E:\\contrast\\yolov8\\MoCo\\run\\cache-" + datetime.now().strftime("%Y-%m-%d-%H-%M-%S-moco") moco_args = args class CIFAR10Pair(CIFAR10): def __getitem__(self, index): img = self.data[index] img = Image.fromarray(img) # 原始图像增强 im_1 = self.transform(img) im_2 = self.transform(img) # 退化增强生成额外视图 degraded_results = image_degradation_and_augmentation(img) im_3 = self.transform(Image.fromarray(degraded_results['augmented_images'][0])) # 选择第一组退化增强 im_4 = self.transform(Image.fromarray(degraded_results['cutmix_image'])) return im_1, im_2, im_3, im_4 # 返回原始增强+退化增强 # 定义数据加载器 # class CIFAR10Pair(CIFAR10): # """CIFAR10 Dataset. # """ # def __getitem__(self, index): # img = self.data[index] # img = Image.fromarray(img) # if self.transform is not None: # im_1 = self.transform(img) # im_2 = self.transform(img) # return im_1, im_2 import cv2 import numpy as np import random def apply_interpolation_degradation(img, method): """ 应用插值退化 参数: img: 输入图像(numpy数组) method: 插值方法('nearest', 'bilinear', 'bicubic') 返回: 退化后的图像 """ # 获取图像尺寸 h, w = img.shape[:2] # 应用插值方法 if method == 'nearest': # 最近邻退化: 下采样+上采样 downsampled = cv2.resize(img, (w//2, h//2), interpolation=cv2.INTER_NEAREST) degraded = cv2.resize(downsampled, (w, h), interpolation=cv2.INTER_NEAREST) elif method == 'bilinear': # 双线性退化: 下采样+上采样 downsampled = cv2.resize(img, (w//2, h//2), interpolation=cv2.INTER_LINEAR) degraded = cv2.resize(downsampled, (w, h), interpolation=cv2.INTER_LINEAR) elif method == 'bicubic': # 双三次退化: 下采样+上采样 downsampled = cv2.resize(img, (w//2, h//2), interpolation=cv2.INTER_CUBIC) degraded = cv2.resize(downsampled, (w, h), interpolation=cv2.INTER_CUBIC) else: degraded = img return degraded def darken_image(img, intensity=0.3): """ 应用黑暗处理 - 降低图像亮度并增加暗区对比度 参数: img: 输入图像(numpy数组) intensity: 黑暗强度 (0.1-0.9) 返回: 黑暗处理后的图像 """ # 限制强度范围 intensity = max(0.1, min(0.9, intensity)) # 将图像转换为HSV颜色空间 hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV).astype(np.float32) # 降低亮度(V通道) hsv[:, :, 2] = hsv[:, :, 2] * intensity # 增加暗区的对比度 - 使用gamma校正 gamma = 1.0 + (1.0 - intensity) # 黑暗强度越大,gamma值越大 hsv[:, :, 2] = np.power(hsv[:, :, 2]/255.0, gamma) * 255.0 # 限制值在0-255范围内 hsv[:, :, 2] = np.clip(hsv[:, :, 2], 0, 255) # 转换回RGB return cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2RGB) def random_affine(image): """ 随机仿射变换(缩放和平移) 参数: image: 输入图像(numpy数组) 返回: 变换后的图像 """ height, width = image.shape[:2] # 随机缩放因子 (0.8 to 1.2) scale = random.uniform(0.8, 1.2) # 随机平移 (10% of image size) max_trans = 0.1 * min(width, height) tx = random.randint(-int(max_trans), int(max_trans)) ty = random.randint(-int(max_trans), int(max_trans)) # 变换矩阵 M = np.array([[scale, 0, tx], [0, scale, ty]], dtype=np.float32) # 应用仿射变换 transformed = cv2.warpAffine(image, M, (width, height)) return transformed def augment_hsv(image, h_gain=0.1, s_gain=0.5, v_gain=0.5): """ HSV色彩空间增强 参数: image: 输入图像(numpy数组) h_gain, s_gain, v_gain: 各通道的增益范围 返回: 增强后的图像 """ # 限制增益范围 h_gain = max(-0.1, min(0.1, random.uniform(-h_gain, h_gain))) s_gain = max(0.5, min(1.5, random.uniform(1-s_gain, 1+s_gain))) v_gain = max(0.5, min(1.5, random.uniform(1-v_gain, 1+v_gain))) # 转换为HSV hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV).astype(np.float32) # 应用增益 hsv[:, :, 0] = (hsv[:, :, 0] * (1 + h_gain)) % 180 hsv[:, :, 1] = np.clip(hsv[:, :, 1] * s_gain, 0, 255) hsv[:, :, 2] = np.clip(hsv[:, :, 2] * v_gain, 0, 255) # 转换回RGB return cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2RGB) # def mixup(img1, img2, alpha=0.6): # """ # 将两幅图像混合在一起 # 参数: # img1, img2: 输入图像(numpy数组) # alpha: Beta分布的参数,控制混合比例 # 返回: # 混合后的图像 # """ # # 生成混合比例 # lam = random.betavariate(alpha, alpha) # # 确保图像尺寸相同 # if img1.shape != img2.shape: # img2 = cv2.resize(img2, (img1.shape[1], img1.shape[0])) # # 混合图像 # mixed = (lam * img1.astype(np.float32) + (1 - lam) * img2.astype(np.float32)).astype(np.uint8) # return mixed # def image_degradation_and_augmentation(image,dark_intensity=0.3): # """ # 完整的图像退化和增强流程 # 参数: # image: 输入图像(PIL.Image或numpy数组) # 返回: # dict: 包含所有退化组和最终增强结果的字典 # """ # # 确保输入是numpy数组 # if not isinstance(image, np.ndarray): # image = np.array(image) # # 确保图像为RGB格式 # if len(image.shape) == 2: # image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) # elif image.shape[2] == 4: # image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB) # # 原始图像 # original = image.copy() # # 插值方法列表 # interpolation_methods = ['nearest', 'bilinear', 'bicubic'] # # 第一组退化: 三种插值方法 # group1 = [] # for method in interpolation_methods: # degraded = apply_interpolation_degradation(original, method) # group1.append(degraded) # # 第二组退化: 随机额外退化 # group2 = [] # for img in group1: # # 随机选择一种退化方法 # method = random.choice(interpolation_methods) # extra_degraded = apply_interpolation_degradation(img, method) # group2.append(extra_degraded) # # 所有退化图像组合 # all_degraded_images = [original] + group1 + group2 # # 应用黑暗处理 (在增强之前) # darkened_images = [darken_image(img, intensity=dark_intensity) for img in all_degraded_images] # # 应用数据增强 # # 1. 随机仿射变换 # affine_images = [random_affine(img) for img in darkened_images] # # 2. HSV增强 # hsv_images = [augment_hsv(img) for img in affine_images] # # 3. MixUp增强 # # 随机选择两个增强后的图像进行混合 # mixed_image = mixup( # random.choice(hsv_images), # random.choice(hsv_images) # ) # # 返回结果 # results = { # 'original': original, # 'degraded_group1': group1, # 第一组退化图像 # 'degraded_group2': group2, # 第二组退化图像 # 'augmented_images': hsv_images, # 所有增强后的图像(原始+六组退化) # 'mixup_image': mixed_image # MixUp混合图像 # } # return results # # def add_gaussian_noise(image, mean=0, sigma=25): # # """添加高斯噪声""" # # noise = np.random.normal(mean, sigma, image.shape) # # noisy = np.clip(image + noise, 0, 255).astype(np.uint8) # # return noisy # # def random_cutout(image, max_holes=3, max_height=16, max_width=16): # # """随机CutOut增强""" # # h, w = image.shape[:2] # # for _ in range(random.randint(1, max_holes)): # # hole_h = random.randint(1, max_height) # # hole_w = random.randint(1, max_width) # # y = random.randint(0, h - hole_h) # # x = random.randint(0, w - hole_w) # # image[y:y+hole_h, x:x+hole_w] = 0 # # return image import cv2 import numpy as np import random from matplotlib import pyplot as plt import pywt def wavelet_degradation(image, level=0.5): """小波系数衰减退化""" # 小波分解 coeffs = pywt.dwt2(image, 'haar') cA, (cH, cV, cD) = coeffs # 衰减高频系数 cH = cH * level cV = cV * level cD = cD * level # 重建图像 return pywt.idwt2((cA, (cH, cV, cD)), 'haar')[:image.shape[0], :image.shape[1]] def adaptive_interpolation_degradation(image): """自适应插值退化(随机选择最近邻或双三次插值)""" if random.choice([True, False]): method = cv2.INTER_NEAREST # 最近邻插值 else: method = cv2.INTER_CUBIC # 双三次插值 # 先缩小再放大 scale_factor = random.uniform(0.3, 0.8) small = cv2.resize(image, None, fx=scale_factor, fy=scale_factor, interpolation=method) return cv2.resize(small, (image.shape[1], image.shape[0]), interpolation=method) def bilinear_degradation(image): """双线性插值退化""" # 先缩小再放大 scale_factor = random.uniform(0.3, 0.8) small = cv2.resize(image, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR) return cv2.resize(small, (image.shape[1], image.shape[0]), interpolation=cv2.INTER_LINEAR) def cutmix(img1, img2, bboxes1=None, bboxes2=None, beta=1.0): """ 参数: img1: 第一张输入图像(numpy数组) img2: 第二张输入图像(numpy数组) bboxes1: 第一张图像的边界框(可选) bboxes2: 第二张图像的边界框(可选) beta: Beta分布的参数,控制裁剪区域的大小 返回: 混合后的图像和边界框(如果有) """ # 确保图像尺寸相同 if img1.shape != img2.shape: img2 = cv2.resize(img2, (img1.shape[1], img1.shape[0])) h, w = img1.shape[:2] # 生成裁剪区域的lambda值(混合比例) lam = np.random.beta(beta, beta) # 计算裁剪区域的宽高 cut_ratio = np.sqrt(1. - lam) cut_w = int(w * cut_ratio) cut_h = int(h * cut_ratio) # 随机确定裁剪区域的中心点 cx = np.random.randint(w) cy = np.random.randint(h) # 计算裁剪区域的边界 x1 = np.clip(cx - cut_w // 2, 0, w) y1 = np.clip(cy - cut_h // 2, 0, h) x2 = np.clip(cx + cut_w // 2, 0, w) y2 = np.clip(cy + cut_h // 2, 0, h) # 执行CutMix操作 mixed_img = img1.copy() mixed_img[y1:y2, x1:x2] = img2[y1:y2, x1:x2] # 计算实际的混合比例 lam = 1 - ((x2 - x1) * (y2 - y1) / (w * h)) # 处理边界框(如果有) mixed_bboxes = None if bboxes1 is not None and bboxes2 is not None: mixed_bboxes = [] # 添加第一张图像的边界框 for bbox in bboxes1: mixed_bboxes.append(bbox + [lam]) # 添加混合权重 # 添加第二张图像的边界框(只添加在裁剪区域内的) for bbox in bboxes2: # 检查边界框是否在裁剪区域内 bbox_x_center = (bbox[0] + bbox[2]) / 2 bbox_y_center = (bbox[1] + bbox[3]) / 2 if (x1 <= bbox_x_center <= x2) and (y1 <= bbox_y_center <= y2): mixed_bboxes.append(bbox + [1 - lam]) return mixed_img, mixed_bboxes def image_degradation_and_augmentation(image, bboxes=None): """ 完整的图像退化和增强流程(修改为使用CutMix) 参数: image: 输入图像(PIL.Image或numpy数组) bboxes: 边界框(可选) 返回: dict: 包含所有退化组和最终增强结果的字典 """ # 确保输入是numpy数组 if not isinstance(image, np.ndarray): image = np.array(image) # 确保图像为RGB格式 if len(image.shape) == 2: image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) elif image.shape[2] == 4: image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB) degraded_sets = [] original = image.copy() # 第一组退化:三种基础退化 degraded_sets.append(wavelet_degradation(original.copy())) degraded_sets.append(degraded_sets) degraded_sets.append(adaptive_interpolation_degradation(original.copy())) degraded_sets.append(degraded_sets) degraded_sets.append(bilinear_degradation(original.copy())) degraded_sets.append(degraded_sets) # # 原始图像 # original = image.copy() # # 插值方法列表 # interpolation_methods = ['nearest', 'bilinear', 'bicubic'] # # 第一组退化: 三种插值方法 # group1 = [] # for method in interpolation_methods: # degraded = apply_interpolation_degradation(original, method) # group1.append(degraded) # 第二组退化: 随机额外退化 # group2 = [] # for img in group1: # # 随机选择一种退化方法 # method = random.choice(interpolation_methods) # extra_degraded = apply_interpolation_degradation(img, method) # group2.append(extra_degraded) # 第二组退化:随机选择再退化 methods = [wavelet_degradation, adaptive_interpolation_degradation, bilinear_degradation] group2=[] for img in degraded_sets: selected_method = random.choice(methods) group2.append(selected_method(img)) group2.append(group2) # 原始图像 original = image.copy() all_degraded_images = [original] + degraded_sets + group2 # 应用黑暗处理 dark_original = darken_image(original) dark_degraded = [darken_image(img) for img in all_degraded_images] # 合并原始和退化图像 all_images = [dark_original] + dark_degraded # 应用数据增强 # 1. 随机仿射变换 affine_images = [random_affine(img) for img in all_images] # 2. HSV增强 hsv_images = [augment_hsv(img) for img in affine_images] # 3. CutMix增强 # 随机选择两个增强后的图像进行混合 mixed_image, mixed_bboxes = cutmix( random.choice(hsv_images), random.choice(hsv_images), bboxes1=bboxes if bboxes is not None else None, bboxes2=bboxes if bboxes is not None else None ) # 返回结果 results = { 'original': original, 'degraded': dark_degraded, 'augmented_images': hsv_images, # 所有增强后的图像(原始+六组退化) 'cutmix_image': mixed_image, # CutMix混合图像 'cutmix_bboxes': mixed_bboxes if bboxes is not None else None # 混合后的边界框 } return results train_transform = transforms.Compose([ transforms.RandomResizedCrop(32), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomApply([transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8), transforms.RandomGrayscale(p=0.2), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])]) test_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])]) # data_processing prepare train_data = CIFAR10Pair(root="E:/contrast/yolov8/MoCo/data_visdrone2019", train=True, transform=train_transform, download=False) moco_train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=0, pin_memory=True, drop_last=True) memory_data = CIFAR10(root="E:/contrast/yolov8/MoCo/data_visdrone2019", train=True, transform=test_transform, download=False) memory_loader = DataLoader(memory_data, batch_size=args.batch_size, shuffle=False, num_workers=0, pin_memory=True) test_data = CIFAR10(root="E:/contrast/yolov8/MoCo/data_visdrone2019", train=False, transform=test_transform, download=False) test_loader = DataLoader(test_data, batch_size=args.batch_size, shuffle=False, num_workers=0, pin_memory=True) # 定义基本编码器 # SplitBatchNorm: simulate multi-gpu behavior of BatchNorm in one gpu by splitting alone the batch dimension # implementation adapted from https://github.com/davidcpage/cifar10-fast/blob/master/torch_backend.py class SplitBatchNorm(nn.BatchNorm2d): def __init__(self, num_features, num_splits, **kw): super().__init__(num_features, **kw) self.num_splits = num_splits def forward(self, input): N, C, H, W = input.shape if self.training or not self.track_running_stats: running_mean_split = self.running_mean.repeat(self.num_splits) running_var_split = self.running_var.repeat(self.num_splits) outcome = nn.functional.batch_norm( input.view(-1, C * self.num_splits, H, W), running_mean_split, running_var_split, self.weight.repeat(self.num_splits), self.bias.repeat(self.num_splits), True, self.momentum, self.eps).view(N, C, H, W) self.running_mean.data.copy_(running_mean_split.view(self.num_splits, C).mean(dim=0)) self.running_var.data.copy_(running_var_split.view(self.num_splits, C).mean(dim=0)) return outcome else: return nn.functional.batch_norm( input, self.running_mean, self.running_var, self.weight, self.bias, False, self.momentum, self.eps) class ModelBase(nn.Module): """ Common CIFAR ResNet recipe. Comparing with ImageNet ResNet recipe, it: (i) replaces conv1 with kernel=3, str=1 (ii) removes pool1 """ def __init__(self, feature_dim=128, arch=None, bn_splits=16): super(ModelBase, self).__init__() # use split batchnorm norm_layer = partial(SplitBatchNorm, num_splits=bn_splits) if bn_splits > 1 else nn.BatchNorm2d resnet_arch = getattr(resnet, arch) net = resnet_arch(num_classes=feature_dim, norm_layer=norm_layer) self.net = [] for name, module in net.named_children(): if name == 'conv1': module = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) if isinstance(module, nn.MaxPool2d): continue if isinstance(module, nn.Linear): self.net.append(nn.Flatten(1)) self.net.append(module) self.net = nn.Sequential(*self.net) def forward(self, x): x = self.net(x) # note: not normalized here return x # 定义MOCO class ModelMoCo(nn.Module): def __init__(self, dim=128, K=4096, m=0.99, T=0.1, arch='resnet18', bn_splits=8, symmetric=True): super(ModelMoCo, self).__init__() self.K = K self.m = m self.T = T self.symmetric = symmetric # create the encoders self.encoder_q = ModelBase(feature_dim=dim, arch=arch, bn_splits=bn_splits) self.encoder_k = ModelBase(feature_dim=dim, arch=arch, bn_splits=bn_splits) for param_q, param_k in zip(self.encoder_q.parameters(), self.encoder_k.parameters()): param_k.data.copy_(param_q.data) # initialize param_k.requires_grad = False # not update by gradient 不参与训练 # create the queue self.register_buffer("queue", torch.randn(dim, K)) self.queue = nn.functional.normalize(self.queue, dim=0) self.register_buffer("queue_ptr", torch.zeros(1, dtype=torch.long)) @torch.no_grad() def _momentum_update_key_encoder(self): # 动量更新encoder_k """ Momentum update of the key encoder """ for param_q, param_k in zip(self.encoder_q.parameters(), self.encoder_k.parameters()): param_k.data = param_k.data * self.m + param_q.data * (1. - self.m) @torch.no_grad() def _dequeue_and_enqueue(self, keys): # 出队与入队 batch_size = keys.shape[0] ptr = int(self.queue_ptr) assert self.K % batch_size == 0 # for simplicity # replace the keys at ptr (dequeue and enqueue) self.queue[:, ptr:ptr + batch_size] = keys.t() # transpose ptr = (ptr + batch_size) % self.K # move pointer self.queue_ptr[0] = ptr @torch.no_grad() def _batch_shuffle_single_gpu(self, x): """ Batch shuffle, for making use of BatchNorm. """ # random shuffle index idx_shuffle = torch.randperm(x.shape[0]).cuda() # index for restoring idx_unshuffle = torch.argsort(idx_shuffle) return x[idx_shuffle], idx_unshuffle @torch.no_grad() def _batch_unshuffle_single_gpu(self, x, idx_unshuffle): """ Undo batch shuffle. """ return x[idx_unshuffle] def contrastive_loss(self, im_q, im_k): # compute query features q = self.encoder_q(im_q) # queries: NxC q = nn.functional.normalize(q, dim=1) # already normalized # compute key features with torch.no_grad(): # no gradient to keys # shuffle for making use of BN im_k_, idx_unshuffle = self._batch_shuffle_single_gpu(im_k) k = self.encoder_k(im_k_) # keys: NxC k = nn.functional.normalize(k, dim=1) # already normalized # undo shuffle k = self._batch_unshuffle_single_gpu(k, idx_unshuffle) # compute logits # Einstein sum is more intuitive # positive logits: Nx1 l_pos = torch.einsum('nc,nc->n', [q, k]).unsqueeze(-1) # negative logits: NxK l_neg = torch.einsum('nc,ck->nk', [q, self.queue.clone().detach()]) # logits: Nx(1+K) logits = torch.cat([l_pos, l_neg], dim=1) # apply temperature logits /= self.T # labels: positive key indicators labels = torch.zeros(logits.shape[0], dtype=torch.long).cuda() loss = nn.CrossEntropyLoss().cuda()(logits, labels) # 交叉熵损失 return loss, q, k def forward(self, im1, im2): """ Input: im_q: a batch of query images im_k: a batch of key images Output: loss """ # update the key encoder with torch.no_grad(): # no gradient to keys self._momentum_update_key_encoder() # compute loss if self.symmetric: # asymmetric loss loss_12, q1, k2 = self.contrastive_loss(im1, im2) loss_21, q2, k1 = self.contrastive_loss(im2, im1) loss = loss_12 + loss_21 k = torch.cat([k1, k2], dim=0) else: # asymmetric loss loss, q, k = self.contrastive_loss(im1, im2) self._dequeue_and_enqueue(k) return loss # create model moco_model = ModelMoCo( dim=args.moco_dim, K=args.moco_k, m=args.moco_m, T=args.moco_t, arch=args.arch, bn_splits=args.bn_splits, symmetric=args.symmetric, ).cuda() # print(moco_model.encoder_q) moco_model_1 = ModelMoCo( dim=args.moco_dim, K=args.moco_k, m=args.moco_m, T=args.moco_t, arch=args.arch, bn_splits=args.bn_splits, symmetric=args.symmetric, ).cuda() # print(moco_model_1.encoder_q) """ CIFAR10 Dataset. """ from torch.cuda import amp scaler = amp.GradScaler(enabled=cuda) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # train for one epoch # def moco_train(net, net_1, data_loader, train_optimizer, epoch, args): # net.train() # adjust_learning_rate(moco_optimizer, epoch, args) # total_loss, total_num, train_bar = 0.0, 0, tqdm(data_loader) # loss_add = 0.0 # for im_1, im_2 in train_bar: # im_1, im_2 = im_1.cuda(non_blocking=True), im_2.cuda(non_blocking=True) # loss = net(im_1, im_2) # 原始图像对比损失 梯度清零—>梯度回传—>梯度跟新 # # lossT = loss # 只使用原始对比损失 # # train_optimizer.zero_grad() # # lossT.backward() # # train_optimizer.step() # # loss_add += lossT.item() # # total_num += data_loader.batch_size # # total_loss += loss.item() * data_loader.batch_size # # train_bar.set_description( # # 'Train Epoch: [{}/{}], lr: {:.6f}, Loss: {:.4f}'.format( # # epoch, args.epochs, # # train_optimizer.param_groups[0]['lr'], # # loss_add / total_num # # ) # # ) # #傅里叶变换处理流程 # #im_3 = torch.rfft(im_1, 3, onesided=False, normalized=True)[:, :, :, :, 0] # fft_output = torch.fft.fftn(im_1, dim=(-3, -2, -1), norm="ortho")#转换为频域 # real_imag = torch.view_as_real(fft_output)#分解实部虚部 # im_3 = real_imag[..., 0]#提取频域实部作为新视图 # #该处理实现了频域空间的增强,与空间域增强形成了互补 # #im_4 = torch.rfft(im_2, 3, onesided=False, normalized=True)[:, :, :, :, 0] # fft_output = torch.fft.fftn(im_2, dim=(-3, -2, -1), norm="ortho") # real_imag = torch.view_as_real(fft_output) # im_4 = real_imag[..., 0] # loss_1 = net_1(im_3, im_4)#频域特征对比损失 # lossT = 0.8*loss + 0.2*loss_1#多模态损失对比融合 # train_optimizer.zero_grad() # lossT.backward() # train_optimizer.step() # loss_add += lossT # total_num += data_loader.batch_size # total_loss += loss.item() * data_loader.batch_size # # train_bar.set_description( # # 'Train Epoch: [{}/{}], lr: {:.6f}, Loss: {:.4f}'.format(epoch, args.epochs, moco_optimizer.param_groups[0]['lr'], # # loss_add / total_num)) # return (loss_add / total_num).cpu().item() # yolov5需要的损失 def moco_train(net, net_1, data_loader, train_optimizer, epoch, args): net.train() adjust_learning_rate(train_optimizer, epoch, args) total_loss, total_num = 0.0, 0 train_bar = tqdm(data_loader) for im_1, im_2, im_3, im_4 in train_bar: # 接收4组视图 im_1, im_2 = im_1.cuda(), im_2.cuda() im_3, im_4 = im_3.cuda(), im_4.cuda() # 原始空间域对比损失 loss_orig = net(im_1, im_2) # 退化增强图像的空间域对比损失 loss_degraded = net(im_3, im_4) # 频域处理(对退化增强后的图像) fft_3 = torch.fft.fftn(im_3, dim=(-3, -2, -1), norm="ortho") fft_3 = torch.view_as_real(fft_3)[..., 0] # 取实部 fft_4 = torch.fft.fftn(im_4, dim=(-3, -2, -1), norm="ortho") fft_4 = torch.view_as_real(fft_4)[..., 0] # 频域对比损失 loss_freq = net_1(fft_3, fft_4) # 多模态损失融合 loss = 0.6 * loss_orig + 0.3 * loss_degraded + 0.1 * loss_freq # 反向传播 train_optimizer.zero_grad() loss.backward() train_optimizer.step() # 记录损失 total_num += data_loader.batch_size total_loss += loss.item() # train_bar.set_description(f'Epoch: [{epoch}/{args.epochs}] Loss: {total_loss/total_num:.4f}') return total_loss / total_num # lr scheduler for training def adjust_learning_rate(optimizer, epoch, args): # 学习率衰减 """Decay the learning rate based on schedule""" lr = args.lr if args.cos: # cosine lr schedule lr *= 0.5 * (1. + math.cos(math.pi * epoch / args.epochs)) else: # stepwise lr schedule for milestone in args.schedule: lr *= 0.1 if epoch >= milestone else 1. for param_group in optimizer.param_groups: param_group['lr'] = lr # test using a knn monitor def test(net, memory_data_loader, test_data_loader, epoch, args): net.eval() classes = len(memory_data_loader.dataset.classes) total_top1, total_top5, total_num, feature_bank = 0.0, 0.0, 0, [] with torch.no_grad(): # generate feature bank for data, target in tqdm(memory_data_loader, desc='Feature extracting'): feature = net(data.cuda(non_blocking=True)) feature = F.normalize(feature, dim=1) feature_bank.append(feature) # [D, N] feature_bank = torch.cat(feature_bank, dim=0).t().contiguous() # [N] feature_labels = torch.tensor(memory_data_loader.dataset.targets, device=feature_bank.device) # loop test data_processing to predict the label by weighted knn search test_bar = tqdm(test_data_loader) for data, target in test_bar: data, target = data.cuda(non_blocking=True), target.cuda(non_blocking=True) feature = net(data) feature = F.normalize(feature, dim=1) pred_labels = knn_predict(feature, feature_bank, feature_labels, classes, args.knn_k, args.knn_t) total_num += data.size(0) total_top1 += (pred_labels[:, 0] == target).float().sum().item() test_bar.set_description( 'Test Epoch: [{}/{}] Acc@1:{:.2f}%'.format(epoch, args.epochs, total_top1 / total_num * 100)) return total_top1 / total_num * 100 # knn monitor as in InstDisc https://arxiv.org/abs/1805.01978 # implementation follows http://github.com/zhirongw/lemniscate.pytorch and https://github.com/leftthomas/SimCLR def knn_predict(feature, feature_bank, feature_labels, classes, knn_k, knn_t): # compute cos similarity between each feature vector and feature bank ---> [B, N] sim_matrix = torch.mm(feature, feature_bank) # [B, K] sim_weight, sim_indices = sim_matrix.topk(k=knn_k, dim=-1) # [B, K] sim_labels = torch.gather(feature_labels.expand(feature.size(0), -1), dim=-1, index=sim_indices) sim_weight = (sim_weight / knn_t).exp() # counts for each class one_hot_label = torch.zeros(feature.size(0) * knn_k, classes, device=sim_labels.device) # [B*K, C] one_hot_label = one_hot_label.scatter(dim=-1, index=sim_labels.view(-1, 1), value=1.0) # weighted score ---> [B, C] pred_scores = torch.sum(one_hot_label.view(feature.size(0), -1, classes) * sim_weight.unsqueeze(dim=-1), dim=1) pred_labels = pred_scores.argsort(dim=-1, descending=True) return pred_labels # 开始训练 # define optimizer moco_optimizer = torch.optim.SGD(moco_model.parameters(), lr=args.lr, weight_decay=args.wd, momentum=0.9) 上述问题怎么修改?

import numpy as np import pandas as pd import torch import torch.nn as nn import torch.optim as optim from sklearn.preprocessing import MinMaxScaler from sklearn.metrics import mean_absolute_error, mean_squared_error import matplotlib.pyplot as plt from scipy.interpolate import BSpline # ------------------------------------------------------ # 1. 配置超参数(参考文献设置) # ------------------------------------------------------ context_len = 168 # 上下文长度:过去168小时(7天),文献用168 pred_len = 24 # 预测长度:未来24小时(1天),文献用24 kan_nodes = 40 # KAN层节点数,文献用40 kan_depth = 3 # KAN深度:3层(结构[168,40,40,24]),文献用3深度 spline_k = 3 # B样条阶数,文献用k=3 spline_G = 5 # 样条网格数(区间数),文献用G=5 epochs = 500 # 训练轮次,文献用500 lr = 0.001 # 学习率,文献用0.001 batch_size = 32 # 批次大小(根据数据量调整) # 设备配置(GPU优先) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"使用设备:{device}") # ------------------------------------------------------ # 2. 数据预处理(时间序列样本构建) # ------------------------------------------------------ def load_and_preprocess_data(data_path): """ 加载大坝渗流水位数据并预处理: 1. 加载数据,按时间排序 2. 归一化(文献对卫星流量做了归一化) 3. 构建[上下文长度, 预测长度]的样本对 """ # 加载数据 df = pd.read_csv(data_path) df["time"] = pd.to_datetime(df["time"]) df = df.sort_values("time").reset_index(drop=True) # 按时间排序 water_level = df["water_level"].values.reshape(-1, 1) # 提取水位数据 # 归一化(MinMaxScaler,缩放到[0,1]) scaler = MinMaxScaler(feature_range=(0, 1)) water_level_scaled = scaler.fit_transform(water_level) # 构建样本:X=(样本数, context_len), y=(样本数, pred_len) X, y = [], [] for i in range(len(water_level_scaled) - context_len - pred_len + 1): X.append(water_level_scaled[i:i + context_len, 0]) # 过去context_len个时刻 y.append(water_level_scaled[i + context_len:i + context_len + pred_len, 0]) # 未来pred_len个时刻 # 转换为numpy数组 X = np.array(X) y = np.array(y) # 划分训练集/测试集(文献:2周+1天训练,1周+1天测试,按时间划分避免泄露) train_ratio = 0.7 # 可根据数据量调整,确保测试集包含完整预测周期 train_size = int(len(X) * train_ratio) X_train, X_test = X[:train_size], X[train_size:] y_train, y_test = y[:train_size], y[train_size:] # 转换为PyTorch张量(适配模型输入) X_train = torch.tensor(X_train, dtype=torch.float32).to(device) X_test = torch.tensor(X_test, dtype=torch.float32).to(device) y_train = torch.tensor(y_train, dtype=torch.float32).to(device) y_test = torch.tensor(y_test, dtype=torch.float32).to(device) return X_train, X_test, y_train, y_test, scaler, df # ------------------------------------------------------ # 3. 实现KAN核心组件:B样条层(参考文献2.2节) # ------------------------------------------------------ class BSplines(nn.Module): """ B样条函数模块:生成k阶B样条基函数,控制点为可学习参数 文献逻辑:KAN的边缘用样条参数化单变量函数,控制点通过训练优化 """ def __init__(self, in_dim, out_dim, G, k): super(BSplines, self).__init__() self.in_dim = in_dim # 输入维度(前一层输出维度) self.out_dim = out_dim # 输出维度(后一层输入维度) self.G = G # 网格数(区间数) self.k = k # 样条阶数 self.num_knots = G + k + 1 # 节点数(B样条公式要求:k阶样条需G+k+1个节点) # 1. 固定样条节点(均匀分布在[0,1],因输入已归一化) self.knots = torch.linspace(0, 1, self.num_knots, device=device) # 2. 可学习控制点:形状(in_dim, out_dim, G+k)(每个输入-输出对对应一组控制点) # 初始化:均匀分布,确保初始样条平滑 self.ctrl_points = nn.Parameter( torch.randn(in_dim, out_dim, G + k, device=device) * 0.1 ) def forward(self, x): """ 前向传播:计算输入x通过B样条后的输出 x: (batch_size, in_dim) -> 输出: (batch_size, out_dim) """ batch_size = x.shape[0] output = torch.zeros(batch_size, self.out_dim, device=device) # 对每个输入维度和输出维度,计算B样条值并求和(文献:节点执行简单求和) for i in range(self.in_dim): # 遍历输入维度 for j in range(self.out_dim): # 遍历输出维度 # 提取当前输入维度的特征(batch_size, 1) x_i = x[:, i].unsqueeze(1) # 生成B样条基函数(scipy的BSpline,转换为torch函数) bspline = BSpline(self.knots.cpu().numpy(), self.ctrl_points[i, j].cpu().numpy(), self.k) # 计算样条值(批量处理) spline_val = torch.tensor(bspline(x_i.cpu().numpy()), dtype=torch.float32, device=device) # 累加至输出(文献:KAN层的求和逻辑) output[:, j] += spline_val.squeeze(1) return output # ------------------------------------------------------ # 4. 构建KAN模型(参考文献2.3节时间序列KAN结构) # ------------------------------------------------------ class KANTimeSeries(nn.Module): """ KAN时间序列预测模型:堆叠BSplines层,对应文献的深度结构 示例结构(3深度):input(context_len) -> BSplines -> BSplines -> BSplines -> output(pred_len) """ def __init__(self, context_len, pred_len, kan_nodes, kan_depth, spline_G, spline_k): super(KANTimeSeries, self).__init__() self.context_len = context_len self.pred_len = pred_len self.kan_depth = kan_depth self.layers = nn.ModuleList() # 存储KAN层 # 1. 输入层 -> 第一个KAN层(输入维度=context_len,输出维度=kan_nodes) self.layers.append(BSplines(in_dim=context_len, out_dim=kan_nodes, G=spline_G, k=spline_k)) # 2. 中间KAN层(深度=kan_depth-2,输入输出维度=kan_nodes) for _ in range(kan_depth - 2): self.layers.append(BSplines(in_dim=kan_nodes, out_dim=kan_nodes, G=spline_G, k=spline_k)) # 3. 最后一个KAN层 -> 输出层(输入维度=kan_nodes,输出维度=pred_len) self.layers.append(BSplines(in_dim=kan_nodes, out_dim=pred_len, G=spline_G, k=spline_k)) # 可选:添加批量归一化(提升训练稳定性,文献未提但工程常用) self.bn_layers = nn.ModuleList([nn.BatchNorm1d(kan_nodes) for _ in range(kan_depth - 1)]) def forward(self, x): """前向传播:x -> (batch_size, context_len) -> 输出: (batch_size, pred_len)""" for i, layer in enumerate(self.layers): if i < len(self.bn_layers): # 中间层后加BN x = self.bn_layers[i](layer(x)) else: # 输出层不加BN x = layer(x) return x # ------------------------------------------------------ # 5. 模型训练与评估 # ------------------------------------------------------ def train_model(model, X_train, y_train, X_test, y_test, epochs, batch_size, lr): """训练KAN模型,打印训练日志并返回训练历史""" # 定义损失函数(MAE,文献用MAE) criterion = nn.L1Loss() # 定义优化器(Adam,文献用Adam) optimizer = optim.Adam(model.parameters(), lr=lr) # 数据加载器(批量训练) train_dataset = torch.utils.data.TensorDataset(X_train, y_train) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) # 训练历史 train_loss_history = [] val_loss_history = [] model.train() # 训练模式 for epoch in range(epochs): epoch_train_loss = 0.0 for batch_X, batch_y in train_loader: # 前向传播 outputs = model(batch_X) loss = criterion(outputs, batch_y) # 反向传播与优化 optimizer.zero_grad() loss.backward() optimizer.step() epoch_train_loss += loss.item() * batch_X.size(0) # 计算epoch平均训练损失 avg_train_loss = epoch_train_loss / len(X_train) train_loss_history.append(avg_train_loss) # 验证(每10轮打印一次) model.eval() with torch.no_grad(): val_outputs = model(X_test) val_loss = criterion(val_outputs, y_test).item() val_loss_history.append(val_loss) if (epoch + 1) % 10 == 0: print(f"Epoch [{epoch + 1}/{epochs}], Train Loss: {avg_train_loss:.6f}, Val Loss: {val_loss:.6f}") model.train() # 恢复训练模式 return train_loss_history, val_loss_history def evaluate_model(model, X_test, y_test, scaler): """评估模型:计算MAE、MSE、RMSE、MAPE,返回反归一化后的真实值与预测值""" model.eval() with torch.no_grad(): # 预测 y_pred = model(X_test) # 转换为numpy数组(反归一化) y_test_np = y_test.cpu().numpy().reshape(-1, 1) y_pred_np = y_pred.cpu().numpy().reshape(-1, 1) # 反归一化(恢复原始水位尺度) y_test_original = scaler.inverse_transform(y_test_np).reshape(-1, pred_len) y_pred_original = scaler.inverse_transform(y_pred_np).reshape(-1, pred_len) # 计算评估指标(取所有样本的平均) mae = mean_absolute_error(y_test_original, y_pred_original) mse = mean_squared_error(y_test_original, y_pred_original) rmse = np.sqrt(mse) # MAPE:避免除以0,加小值 mape = np.mean(np.abs((y_test_original - y_pred_original) / (y_test_original + 1e-6))) * 100 print("\n模型评估结果:") print(f"MAE: {mae:.4f} m") print(f"MSE: {mse:.4f} m²") print(f"RMSE: {rmse:.4f} m") print(f"MAPE: {mape:.2f}%") return y_test_original, y_pred_original # ------------------------------------------------------ # 6. 结果可视化(参考文献图3) # ------------------------------------------------------ def plot_results(df, y_test_original, y_pred_original, context_len, pred_len): """ 可视化: 1. 训练损失曲线 2. 测试集上的真实水位vs预测水位(选最后一个测试样本) """ # 1. 训练损失曲线 plt.figure(figsize=(12, 6)) plt.subplot(1, 2, 1) plt.plot(train_loss_history, label="Train Loss") plt.plot(val_loss_history, label="Val Loss") plt.xlabel("Epoch") plt.ylabel("MAE Loss") plt.title("KAN Model Training Loss") plt.legend() plt.grid(True, alpha=0.3) # 2. 真实水位vs预测水位(选最后一个测试样本,更具代表性) last_test_idx = -1 true_water = y_test_original[last_test_idx] pred_water = y_pred_original[last_test_idx] # 生成时间轴(假设测试集最后一个样本的预测时间对应原始数据的最后pred_len个时刻) test_time = df["time"].iloc[-len(true_water):].values plt.subplot(1, 2, 2) plt.plot(test_time, true_water, label="真实渗流水位", color="blue", linewidth=2) plt.plot(test_time, pred_water, label="预测渗流水位", color="red", linewidth=2, linestyle="--") plt.xlabel("时间") plt.ylabel("渗流水位 (m)") plt.title("大坝渗流水位预测结果(最后24小时)") plt.legend() plt.xticks(rotation=45) plt.grid(True, alpha=0.3) plt.tight_layout() plt.show() # ------------------------------------------------------ # 7. 主函数(串联所有流程) # ------------------------------------------------------ if __name__ == "__main__": # (1)加载数据(替换为你的大坝渗流水位数据路径) data_path = "dam_water_level.csv" # 格式:time, water_level X_train, X_test, y_train, y_test, scaler, df = load_and_preprocess_data(data_path) print(f"数据加载完成:训练样本数={len(X_train)}, 测试样本数={len(X_test)}") # (2)构建KAN模型 kan_model = KANTimeSeries( context_len=context_len, pred_len=pred_len, kan_nodes=kan_nodes, kan_depth=kan_depth, spline_G=spline_G, spline_k=spline_k ).to(device) print("KAN模型结构:") print(kan_model) # (3)训练模型 print("\n开始训练模型...") train_loss_history, val_loss_history = train_model( model=kan_model, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test, epochs=epochs, batch_size=batch_size, lr=lr ) # (4)评估模型 y_test_original, y_pred_original = evaluate_model(kan_model, X_test, y_test, scaler) # (5)可视化结果 plot_results(df, y_test_original, y_pred_original, context_len, pred_len) # (6)保存模型(可选) torch.save(kan_model.state_dict(), "kan_dam_water_level.pth") print("\n模型已保存为:kan_dam_water_level.pth") 把代码中关于设备配置使用GPU的删除

import os import datetime import numpy as np import matplotlib.pyplot as plt import shutil from glob import glob from tqdm import tqdm from PIL import Image import csv import torch import torch.nn as nn import torch.optim as optim import torch.nn.functional as F from torch.utils.data import Dataset, DataLoader from torchvision import transforms from torch.utils.tensorboard import SummaryWriter from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score class AttentionUNet: def __init__(self, input_shape, classes, epochs, result_path, database_path, learning_rate=1e-4, batch_size=8, early_stopping_patience=10, lr_reduction_patience=5, lr_reduction_factor=0.5, dropout_rate=0.3, filters_base=32, kernel_size=3, activation='relu', output_activation='softmax', optimizer='adam', loss='cross_entropy', # 默认使用PyTorch支持的损失函数 metrics=['accuracy'], attention_mechanism='additive'): self.input_shape = input_shape self.num_classes = classes['class_number'] # 解析类别信息 self.background_gray = classes['bg'][0] self.background_name = classes['bg'][1] self.foreground_labels = classes['fg'] # 创建灰度值到类别索引的映射 self.gray_to_index = {self.background_gray: 0} # 背景映射到索引0 # 前景映射到索引1,2,... for idx, (gray_val, name) in enumerate(self.foreground_labels.items(), start=1): self.gray_to_index[gray_val] = idx # 验证映射 if len(self.gray_to_index) != self.num_classes: raise ValueError(f"类别数量不匹配: 配置的类别数={self.num_classes}, 实际映射的类别数={len(self.gray_to_index)}") self.epochs = epochs self.result_path = result_path self.database_path = database_path self.learning_rate = learning_rate self.batch_size = batch_size self.early_stopping_patience = early_stopping_patience self.lr_reduction_patience = lr_reduction_patience self.lr_reduction_factor = lr_reduction_factor self.dropout_rate = dropout_rate self.filters_base = filters_base self.kernel_size = kernel_size self.activation = activation self.output_activation = output_activation self.optimizer_type = optimizer self.loss_type = loss self.metrics = metrics self.attention_mechanism = attention_mechanism self.model = None self.history = None self.best_model_path = os.path.join(self.result_path, 'models', 'best_model.pth') self.temp_model_dir = os.path.join(self.result_path, 'models', 'temp_epochs') self.train_log_path = os.path.join(self.result_path, 'diagram', '训练日志.csv') self.best_model_performance_path = os.path.join(self.result_path, 'diagram', '最佳模型性能.txt') self.performance_plots_dir = os.path.join(self.result_path, 'diagram', 'performance_plots') self.tensorboard_dir = os.path.join(self.result_path, 'tensorboard') # 创建结果目录结构 self._create_result_directories() self._print_model_config() # 设置设备 self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"使用设备: {self.device}") def _create_result_directories(self): """创建结果文件目录结构并保存配置。""" dirs_to_create = [ self.result_path, os.path.join(self.result_path, 'models'), os.path.join(self.result_path, 'diagram'), self.temp_model_dir, # 确保包含这个目录 self.performance_plots_dir, self.tensorboard_dir ] for dir_path in dirs_to_create: os.makedirs(dir_path, exist_ok=True) print(f"目录已创建: {dir_path}") print(f"结果文件目录结构已创建:{self.result_path}") # 保存模型配置 config_path = os.path.join(self.result_path, 'models', 'configuration.txt') try: with open(config_path, 'w', encoding='utf-8') as f: f.write("Attention U-Net 模型配置 (PyTorch 实现)\n") f.write(f"保存时间: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n") f.write("="*50 + "\n\n") f.write("核心参数:\n") f.write(f"输入尺寸: {self.input_shape}\n") f.write(f"类别数: {self.num_classes}\n") f.write(f"背景类别: {self.background_name} (灰度值: {self.background_gray})\n") f.write("前景类别:\n") for gray_val, name in self.foreground_labels.items(): f.write(f" {name}: 灰度值 {gray_val}\n") f.write("\n训练参数:\n") f.write(f"训练轮数: {self.epochs}\n") f.write(f"批次大小: {self.batch_size}\n") f.write(f"学习率: {self.learning_rate}\n") f.write(f"早停耐心值: {self.early_stopping_patience}\n") f.write(f"学习率衰减耐心值: {self.lr_reduction_patience}\n") f.write(f"学习率衰减因子: {self.lr_reduction_factor}\n") f.write("\n模型架构参数:\n") f.write(f"基础卷积核数量: {self.filters_base}\n") f.write(f"卷积核大小: {self.kernel_size}\n") f.write(f"激活函数: {self.activation}\n") f.write(f"输出激活函数: {self.output_activation}\n") f.write(f"Dropout比率: {self.dropout_rate}\n") f.write(f"注意力机制类型: {self.attention_mechanism}\n") f.write("\n优化参数:\n") f.write(f"优化器: {self.optimizer_type}\n") f.write(f"损失函数: {self.loss_type}\n") f.write(f"监控指标: {', '.join(self.metrics)}\n") print(f"模型配置已保存到: {config_path}") except Exception as e: print(f"保存配置时出错: {e}") def _print_model_config(self): """在控制台打印模型配置。""" print("\n" + "="*30) print("模型配置:") print(f"输入尺寸: {self.input_shape}") print(f"类别数: {self.num_classes}") print(f"背景类别: {self.background_name} (灰度值: {self.background_gray} -> 索引: {self.gray_to_index[self.background_gray]})") print("前景类别:") for gray_val, name in self.foreground_labels.items(): print(f" {name} (灰度值: {gray_val} -> 索引: {self.gray_to_index[gray_val]})") print(f"训练轮数: {self.epochs}") print(f"结果文件路径: {self.result_path}") print(f"数据集文件路径: {self.database_path}") print(f"学习率: {self.learning_rate}") print(f"批次大小: {self.batch_size}") print(f"早停耐心值: {self.early_stopping_patience}") print(f"学习率衰减耐心值: {self.lr_reduction_patience}") print(f"学习率衰减因子: {self.lr_reduction_factor}") print(f"Dropout比率: {self.dropout_rate}") print(f"基础卷积核数量: {self.filters_base}") print(f"卷积核大小: {self.kernel_size}") print(f"激活函数: {self.activation}") print(f"输出激活函数: {self.output_activation}") print(f"优化器: {self.optimizer_type}") print(f"损失函数: {self.loss_type}") print(f"监控指标: {self.metrics}") print(f"注意力机制类型: {self.attention_mechanism}") print("="*30 + "\n") class AttentionUNetModel(nn.Module): """PyTorch实现的Attention U-Net模型""" def __init__(self, input_channels, num_classes, filters_base=32, kernel_size=3, dropout_rate=0.3, attention_mechanism='additive', output_activation='softmax'): super().__init__() self.input_channels = input_channels self.num_classes = num_classes self.filters_base = filters_base self.kernel_size = kernel_size self.dropout_rate = dropout_rate self.attention_mechanism = attention_mechanism self.output_activation = output_activation # 添加输出激活函数属性 # 编码器 (Encoder) self.enc1 = self._conv_block(input_channels, filters_base) self.enc2 = self._conv_block(filters_base, filters_base * 2) self.enc3 = self._conv_block(filters_base * 2, filters_base * 4) self.enc4 = self._conv_block(filters_base * 4, filters_base * 8) self.pool = nn.MaxPool2d(2) self.dropout = nn.Dropout2d(dropout_rate) # Bottleneck self.bottleneck = self._conv_block(filters_base * 8, filters_base * 16) # 解码器 (Decoder) self.up1 = self._up_block(filters_base * 16, filters_base * 8) self.att1 = self._attention_gate(filters_base * 8, filters_base * 8) self.dec1 = self._conv_block(filters_base * 16, filters_base * 8) self.up2 = self._up_block(filters_base * 8, filters_base * 4) self.att2 = self._attention_gate(filters_base * 4, filters_base * 4) self.dec2 = self._conv_block(filters_base * 8, filters_base * 4) self.up3 = self._up_block(filters_base * 4, filters_base * 2) self.att3 = self._attention_gate(filters_base * 2, filters_base * 2) self.dec3 = self._conv_block(filters_base * 4, filters_base * 2) self.up4 = self._up_block(filters_base * 2, filters_base) self.att4 = self._attention_gate(filters_base, filters_base) self.dec4 = self._conv_block(filters_base * 2, filters_base) # 输出层 self.out_conv = nn.Conv2d(filters_base, num_classes, kernel_size=1) def _conv_block(self, in_channels, out_channels): """标准卷积块,包含Conv2D, BatchNorm, Activation。""" return nn.Sequential( nn.Conv2d(in_channels, out_channels, kernel_size=self.kernel_size, padding=self.kernel_size//2), nn.BatchNorm2d(out_channels), nn.ReLU(inplace=True), nn.Conv2d(out_channels, out_channels, kernel_size=self.kernel_size, padding=self.kernel_size//2), nn.BatchNorm2d(out_channels), nn.ReLU(inplace=True) ) def _up_block(self, in_channels, out_channels): """上采样块,包含ConvTranspose2d和Dropout。""" return nn.Sequential( nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2), nn.Dropout2d(self.dropout_rate) ) def _attention_gate(self, g_channels, x_channels): """注意力门 (Attention Gate)。""" if self.attention_mechanism == 'additive': # 加性注意力机制 return nn.Sequential( nn.Conv2d(g_channels, x_channels, kernel_size=1), nn.BatchNorm2d(x_channels), nn.ReLU(inplace=True), nn.Conv2d(x_channels, x_channels, kernel_size=1), nn.BatchNorm2d(x_channels), nn.Sigmoid() ) elif self.attention_mechanism == 'multiplicative': # 乘性注意力机制 return nn.Sequential( nn.Conv2d(g_channels + x_channels, x_channels, kernel_size=1), nn.BatchNorm2d(x_channels), nn.ReLU(inplace=True), nn.Conv2d(x_channels, 1, kernel_size=1), nn.BatchNorm2d(1), nn.Sigmoid() ) else: raise ValueError(f"不支持的注意力机制类型: {self.attention_mechanism}") def forward(self, x): # 编码器路径 enc1 = self.enc1(x) enc1_pool = self.pool(enc1) enc1_pool = self.dropout(enc1_pool) enc2 = self.enc2(enc1_pool) enc2_pool = self.pool(enc2) enc2_pool = self.dropout(enc2_pool) enc3 = self.enc3(enc2_pool) enc3_pool = self.pool(enc3) enc3_pool = self.dropout(enc3_pool) enc4 = self.enc4(enc3_pool) enc4_pool = self.pool(enc4) enc4_pool = self.dropout(enc4_pool) # 瓶颈层 bottleneck = self.bottleneck(enc4_pool) # 解码器路径 # 上采样块1 up1 = self.up1(bottleneck) if self.attention_mechanism == 'additive': att1 = self.att1(up1) att1 = att1 * enc4 else: att1 = self.att1(torch.cat([up1, enc4], dim=1)) att1 = att1 * enc4 merge1 = torch.cat([up1, att1], dim=1) dec1 = self.dec1(merge1) # 上采样块2 up2 = self.up2(dec1) if self.attention_mechanism == 'additive': att2 = self.att2(up2) att2 = att2 * enc3 else: att2 = self.att2(torch.cat([up2, enc3], dim=1)) att2 = att2 * enc3 merge2 = torch.cat([up2, att2], dim=1) dec2 = self.dec2(merge2) # 上采样块3 up3 = self.up3(dec2) if self.attention_mechanism == 'additive': att3 = self.att3(up3) att3 = att3 * enc2 else: att3 = self.att3(torch.cat([up3, enc2], dim=1)) att3 = att3 * enc2 merge3 = torch.cat([up3, att3], dim=1) dec3 = self.dec3(merge3) # 上采样块4 up4 = self.up4(dec3) if self.attention_mechanism == 'additive': att4 = self.att4(up4) att4 = att4 * enc1 else: att4 = self.att4(torch.cat([up4, enc1], dim=1)) att4 = att4 * enc1 merge4 = torch.cat([up4, att4], dim=1) dec4 = self.dec4(merge4) # 输出层 out = self.out_conv(dec4) # 应用输出激活函数 if self.output_activation == 'softmax': out = F.softmax(out, dim=1) elif self.output_activation == 'sigmoid': out = torch.sigmoid(out) return out def _dice_loss(self, y_pred, y_true, smooth=1e-6): """Dice损失函数实现""" # 展平预测和真实标签 y_pred_flat = y_pred.contiguous().view(-1) y_true_flat = y_true.contiguous().view(-1) # 计算交集和并集 intersection = (y_pred_flat * y_true_flat).sum() union = y_pred_flat.sum() + y_true_flat.sum() # 计算Dice系数 dice = (2. * intersection + smooth) / (union + smooth) # 返回Dice损失 return 1 - dice def _focal_loss(self, y_pred, y_true, alpha=0.25, gamma=2.0): """Focal损失函数实现""" # 计算交叉熵 ce_loss = F.binary_cross_entropy(y_pred, y_true, reduction='none') # 计算概率 p_t = y_true * y_pred + (1 - y_true) * (1 - y_pred) # 计算Focal损失 focal_loss = torch.pow(1 - p_t, gamma) * ce_loss # 应用alpha权重 if alpha is not None: alpha_t = y_true * alpha + (1 - y_true) * (1 - alpha) focal_loss = alpha_t * focal_loss return focal_loss.mean() def _jaccard_loss(self, y_pred, y_true, smooth=1e-6): """Jaccard损失函数实现(IoU损失)""" # 展平预测和真实标签 y_pred_flat = y_pred.contiguous().view(-1) y_true_flat = y_true.contiguous().view(-1) # 计算交集和并集 intersection = (y_pred_flat * y_true_flat).sum() total = y_pred_flat.sum() + y_true_flat.sum() union = total - intersection # 计算Jaccard指数(IoU) iou = (intersection + smooth) / (union + smooth) return 1 - iou def build_model(self): """构建Attention U-Net模型。""" input_channels = self.input_shape[2] if len(self.input_shape) == 3 else 3 self.model = self.AttentionUNetModel( input_channels=input_channels, num_classes=self.num_classes, filters_base=self.filters_base, kernel_size=self.kernel_size, dropout_rate=self.dropout_rate, attention_mechanism=self.attention_mechanism, output_activation=self.output_activation # 添加输出激活函数参数 ).to(self.device) print("Attention U-Net 模型已成功构建。") print(f"模型参数数量: {sum(p.numel() for p in self.model.parameters() if p.requires_grad):,}") # 选择优化器 if self.optimizer_type.lower() == 'adam': optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate) elif self.optimizer_type.lower() == 'sgd': optimizer = optim.SGD(self.model.parameters(), lr=self.learning_rate, momentum=0.9) elif self.optimizer_type.lower() == 'rmsprop': optimizer = optim.RMSprop(self.model.parameters(), lr=self.learning_rate) else: raise ValueError(f"不支持的优化器类型: {self.optimizer_type}") # 选择损失函数 if self.loss_type.lower() == 'dice_loss': criterion = self._dice_loss elif self.loss_type.lower() == 'focal_loss': criterion = self._focal_loss elif self.loss_type.lower() == 'jaccard_loss': criterion = self._jaccard_loss elif self.loss_type.lower() in ['cross_entropy', 'categorical_crossentropy']: # 支持两种名称 criterion = nn.CrossEntropyLoss() else: raise ValueError(f"不支持的损失函数类型: {self.loss_type}") return self.model, optimizer, criterion class SegmentationDataset(Dataset): """图像分割数据集类""" def __init__(self, image_dir, mask_dir, input_shape, gray_to_index, num_classes): self.image_dir = image_dir self.mask_dir = mask_dir self.input_shape = input_shape self.gray_to_index = gray_to_index self.num_classes = num_classes self.image_files = sorted(glob(os.path.join(image_dir, '*'))) self.mask_files = sorted(glob(os.path.join(mask_dir, '*'))) if not self.image_files: raise FileNotFoundError(f"在 {image_dir} 中未找到任何图像文件") if not self.mask_files: raise FileNotFoundError(f"在 {mask_dir} 中未找到任何掩码文件") if len(self.image_files) != len(self.mask_files): print(f"警告: 数据集中图像文件数量 ({len(self.image_files)}) 与掩码文件数量 ({len(self.mask_files)}) 不匹配") self.transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) def __len__(self): return min(len(self.image_files), len(self.mask_files)) def __getitem__(self, idx): # 加载图像 img_path = self.image_files[idx] img = Image.open(img_path).convert('RGB') img = img.resize((self.input_shape[1], self.input_shape[0])) img = self.transform(img) # 加载掩码 mask_path = self.mask_files[idx] mask = Image.open(mask_path).convert('L') mask = mask.resize((self.input_shape[1], self.input_shape[0]), Image.NEAREST) mask = np.array(mask) # 创建类别索引掩码 (整数类型) processed_mask = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.int64) for gray_val, class_idx in self.gray_to_index.items(): processed_mask[mask == gray_val] = class_idx return img, processed_mask def _load_datasets(self): """加载训练、验证和测试数据集""" # 定义数据集路径 train_image_dir = os.path.join(self.database_path, 'train', 'images') train_mask_dir = os.path.join(self.database_path, 'train', 'masks') val_image_dir = os.path.join(self.database_path, 'val', 'images') val_mask_dir = os.path.join(self.database_path, 'val', 'masks') test_image_dir = os.path.join(self.database_path, 'test', 'images') test_mask_dir = os.path.join(self.database_path, 'test', 'masks') # 创建数据集实例 train_dataset = self.SegmentationDataset( train_image_dir, train_mask_dir, self.input_shape, self.gray_to_index, self.num_classes ) val_dataset = self.SegmentationDataset( val_image_dir, val_mask_dir, self.input_shape, self.gray_to_index, self.num_classes ) test_dataset = self.SegmentationDataset( test_image_dir, test_mask_dir, self.input_shape, self.gray_to_index, self.num_classes ) # 创建数据加载器 train_loader = DataLoader( train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=4 ) val_loader = DataLoader( val_dataset, batch_size=self.batch_size, shuffle=False, num_workers=4 ) test_loader = DataLoader( test_dataset, batch_size=self.batch_size, shuffle=False, num_workers=4 ) # 打印数据集信息 print(f"\n数据集加载完成:") print(f" 训练集: {len(train_dataset)} 个样本") print(f" 验证集: {len(val_dataset)} 个样本") print(f" 测试集: {len(test_dataset)} 个样本") return train_loader, val_loader, test_loader def _calculate_metrics(self, y_true, y_pred, smooth=1e-6, verbose=True): """ 计算分割性能指标。 Args: y_true (np.array): 真实标签 (类别索引)。 y_pred (np.array): 预测结果 (概率)。 smooth (float): 防止除以零的小常数。 verbose (bool): 是否打印详细指标。 Returns: dict: 包含所有计算的指标。 """ if y_true.size == 0 or y_pred.size == 0: print("警告: 真实标签或预测结果为空,无法计算指标。") return {} # 将预测结果转换为类别索引 y_pred_argmax = np.argmax(y_pred, axis=1) # 展平图像以进行指标计算 y_pred_flat = y_pred_argmax.flatten() y_true_flat = y_true.flatten() metrics = {} epsilon = 1e-7 # 防止除以零 # 1. 全局Dice系数 (针对所有前景和背景像素点) # 背景索引 bg_index = self.gray_to_index[self.background_gray] # 将所有前景类别合并为一个"前景"类别 y_true_fg_flat = (y_true_flat != bg_index) y_pred_fg_flat = (y_pred_flat != bg_index) # True Positives (Global Foreground): 真实为前景,预测为前景 TP_global = np.sum(y_true_fg_flat & y_pred_fg_flat) # False Positives (Global Foreground): 真实为背景,预测为前景 FP_global = np.sum(~y_true_fg_flat & y_pred_fg_flat) # False Negatives (Global Foreground): 真实为前景,预测为背景 FN_global = np.sum(y_true_fg_flat & ~y_pred_fg_flat) # True Negatives (Global Background): 真实为背景,预测为背景 TN_global = np.sum(~y_true_fg_flat & ~y_pred_fg_flat) dice_global = (2. * TP_global) / (2 * TP_global + FP_global + FN_global + epsilon) metrics['Dice_Global'] = dice_global accuracy_global = (TP_global + TN_global) / (TP_global + TN_global + FP_global + FN_global + epsilon) metrics['Accuracy_Global'] = accuracy_global if verbose: print(f" 全局 Dice 系数: {metrics['Dice_Global']:.4f}") print(f" 全局准确率: {metrics['Accuracy_Global']:.4f}") # 2. 针对每个类别的像素点计算指标 for gray_val, class_idx in self.gray_to_index.items(): class_name = self.background_name if gray_val == self.background_gray else self.foreground_labels[gray_val] # 提取当前类别的TP, FP, FN, TN TP_class = np.sum((y_true_flat == class_idx) & (y_pred_flat == class_idx)) FP_class = np.sum((y_true_flat != class_idx) & (y_pred_flat == class_idx)) FN_class = np.sum((y_true_flat == class_idx) & (y_pred_flat != class_idx)) TN_class = np.sum((y_true_flat != class_idx) & (y_pred_flat != class_idx)) dice_class = (2. * TP_class) / (2 * TP_class + FP_class + FN_class + epsilon) metrics[f'{class_name}_Dice'] = dice_class accuracy_class = (TP_class + TN_class) / (TP_class + TN_class + FP_class + FN_class + epsilon) metrics[f'{class_name}_Accuracy'] = accuracy_class iou_class = TP_class / (TP_class + FP_class + FN_class + epsilon) metrics[f'{class_name}_IoU'] = iou_class precision_class = TP_class / (TP_class + FP_class + epsilon) metrics[f'{class_name}_Precision'] = precision_class recall_class = TP_class / (TP_class + FN_class + epsilon) metrics[f'{class_name}_Recall'] = recall_class specificity_class = TN_class / (TN_class + FP_class + epsilon) metrics[f'{class_name}_Specificity'] = specificity_class if verbose: print(f"\n --- 类别: {class_name} (灰度值: {gray_val}, 索引: {class_idx}) ---") print(f" Dice: {metrics[f'{class_name}_Dice']:.4f}") print(f" Accuracy: {metrics[f'{class_name}_Accuracy']:.4f}") print(f" IoU: {metrics[f'{class_name}_IoU']:.4f}") print(f" Precision: {metrics[f'{class_name}_Precision']:.4f}") print(f" Recall: {metrics[f'{class_name}_Recall']:.4f}") print(f" Specificity: {metrics[f'{class_name}_Specificity']:.4f}") return metrics def _select_best_model(self, all_metrics): """ 选择最佳模型的筛选规则(可自定义) Args: all_metrics (list): 包含所有epoch模型性能指标的列表 Returns: int: 最佳模型对应的epoch编号 """ # 默认规则:使用全局Dice系数作为主要指标,选择最高值的模型 best_epoch = -1 best_metric_value = -1 # 遍历所有模型的评估结果 for metrics in all_metrics: # 使用全局Dice系数作为选择标准 current_value = metrics.get('Dice_Global', -1) # 如果当前模型性能更好,更新最佳模型 if current_value > best_metric_value: best_metric_value = current_value best_epoch = metrics['Epoch'] print(f"\n最佳模型筛选结果: Epoch {best_epoch} (全局Dice系数: {best_metric_value:.4f})") return best_epoch def train(self): """ 训练Attention U-Net模型。 """ # 确保所有结果目录都存在 self._create_result_directories() # 确保所有目录已创建 # 构建模型 model, optimizer, criterion = self.build_model() # 加载数据集 train_loader, val_loader, test_loader = self._load_datasets() # 初始化TensorBoard writer = SummaryWriter(self.tensorboard_dir) # 初始化变量 best_val_loss = float('inf') epochs_without_improvement = 0 all_metrics = [] # 确保日志目录存在 os.makedirs(os.path.dirname(self.train_log_path), exist_ok=True) # 准备训练日志CSV with open(self.train_log_path, 'w', newline='', encoding='utf-8') as csvfile: fieldnames = ['Epoch', 'Train_Loss', 'Val_Loss', 'Dice_Global', 'Accuracy_Global'] # 添加每个类别的指标字段 for gray_val in self.gray_to_index: class_name = self.background_name if gray_val == self.background_gray else self.foreground_labels[gray_val] fieldnames.extend([ f'{class_name}_Dice', f'{class_name}_Accuracy', f'{class_name}_IoU', f'{class_name}_Precision', f'{class_name}_Recall', f'{class_name}_Specificity' ]) writer_csv = csv.DictWriter(csvfile, fieldnames=fieldnames) writer_csv.writeheader() print("\n开始训练模型...") for epoch in range(1, self.epochs + 1): # 训练阶段 model.train() train_loss = 0.0 for images, masks in tqdm(train_loader, desc=f"Epoch {epoch}/{self.epochs} [训练]"): images = images.to(self.device) masks = masks.to(self.device) # 前向传播 outputs = model(images) # 计算损失 if isinstance(criterion, nn.CrossEntropyLoss): # 对于CrossEntropyLoss,直接使用类别索引 loss = criterion(outputs, masks.long()) else: # 对于其他损失函数,需要将掩码转换为one-hot编码 masks_one_hot = F.one_hot(masks.long(), num_classes=self.num_classes).permute(0, 3, 1, 2).float() loss = criterion(outputs, masks_one_hot) # 反向传播和优化 optimizer.zero_grad() loss.backward() optimizer.step() train_loss += loss.item() * images.size(0) # 计算平均训练损失 train_loss = train_loss / len(train_loader.dataset) # 验证阶段 model.eval() val_loss = 0.0 all_val_preds = [] all_val_masks = [] with torch.no_grad(): for images, masks in tqdm(val_loader, desc=f"Epoch {epoch}/{self.epochs} [验证]"): images = images.to(self.device) masks = masks.to(self.device) # 前向传播 outputs = model(images) # 计算损失 if isinstance(criterion, nn.CrossEntropyLoss): loss = criterion(outputs, masks.long()) else: masks_one_hot = F.one_hot(masks.long(), num_classes=self.num_classes).permute(0, 3, 1, 2).float() loss = criterion(outputs, masks_one_hot) val_loss += loss.item() * images.size(0) # 收集预测结果和真实标签 all_val_preds.append(outputs.cpu().numpy()) all_val_masks.append(masks.cpu().numpy()) # 计算平均验证损失 val_loss = val_loss / len(val_loader.dataset) # 合并所有验证集的预测结果和真实标签 val_preds = np.concatenate(all_val_preds, axis=0) val_masks = np.concatenate(all_val_masks, axis=0) # 计算验证指标 val_metrics = self._calculate_metrics(val_masks, val_preds, verbose=False) val_metrics['Epoch'] = epoch val_metrics['Train_Loss'] = train_loss val_metrics['Val_Loss'] = val_loss # 记录到TensorBoard writer.add_scalar('Loss/Train', train_loss, epoch) writer.add_scalar('Loss/Validation', val_loss, epoch) writer.add_scalar('Metrics/Dice_Global', val_metrics['Dice_Global'], epoch) writer.add_scalar('Metrics/Accuracy_Global', val_metrics['Accuracy_Global'], epoch) # 保存指标到列表 all_metrics.append(val_metrics) # 确保临时目录存在再保存模型 os.makedirs(self.temp_model_dir, exist_ok=True) # 关键修复:确保目录存在 # 保存当前epoch的模型 epoch_model_path = os.path.join(self.temp_model_dir, f'epoch_{epoch:03d}.pth') torch.save({ 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'train_loss': train_loss, 'val_loss': val_loss, 'val_metrics': val_metrics }, epoch_model_path) print(f"Epoch {epoch:03d}: 模型已保存到 {epoch_model_path}") # 写入CSV日志 - 确保目录存在 os.makedirs(os.path.dirname(self.train_log_path), exist_ok=True) with open(self.train_log_path, 'a', newline='', encoding='utf-8') as csvfile: writer_csv = csv.DictWriter(csvfile, fieldnames=fieldnames) writer_csv.writerow(val_metrics) # 打印训练进度 print(f"Epoch {epoch}/{self.epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, " f"Val Dice Global: {val_metrics['Dice_Global']:.4f}, " f"Val Accuracy Global: {val_metrics['Accuracy_Global']:.4f}") # 学习率调度 if epoch > 1 and val_loss < best_val_loss: best_val_loss = val_loss epochs_without_improvement = 0 else: epochs_without_improvement += 1 if epochs_without_improvement >= self.lr_reduction_patience: # 降低学习率 for param_group in optimizer.param_groups: param_group['lr'] *= self.lr_reduction_factor print(f"Epoch {epoch}: 降低学习率至 {optimizer.param_groups[0]['lr']:.2e}") epochs_without_improvement = 0 # 早停检查 if epochs_without_improvement >= self.early_stopping_patience: print(f"Epoch {epoch}: 早停触发,停止训练") break # 关闭TensorBoard写入器 writer.close() # 训练完成后选择最佳模型 self._select_and_save_best_model(all_metrics) self._plot_performance_curves() self._test_best_model(test_loader) def _select_and_save_best_model(self, all_metrics): """选择并保存最佳模型""" best_epoch = self._select_best_model(all_metrics) if best_epoch > 0: # 加载最佳模型 best_model_path = os.path.join(self.temp_model_dir, f'epoch_{best_epoch:03d}.pth') if os.path.exists(best_model_path): # 确保目标目录存在 os.makedirs(os.path.dirname(self.best_model_path), exist_ok=True) # 关键修复 # 复制到最终位置 shutil.copyfile(best_model_path, self.best_model_path) print(f"已将最佳模型 (Epoch {best_epoch}) 保存到: {self.best_model_path}") else: print(f"警告: 找不到最佳模型对应的文件 (Epoch {best_epoch})") else: print("警告: 没有找到有效的最佳模型") # 保存最佳模型性能 if best_epoch > 0: best_metrics = next(m for m in all_metrics if m['Epoch'] == best_epoch) with open(self.best_model_performance_path, 'w', encoding='utf-8') as f: f.write("Attention U-Net 最佳模型验证性能\n") f.write(f"保存时间: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n") f.write(f"最佳模型路径: {self.best_model_path}\n") f.write(f"对应Epoch: {best_epoch}\n") f.write("-" * 50 + "\n") for key, value in best_metrics.items(): if isinstance(value, float): f.write(f"{key}: {value:.4f}\n") else: f.write(f"{key}: {value}\n") print(f"最佳模型的验证性能参数已记录在 '{self.best_model_performance_path}'") # 删除临时模型目录 if os.path.exists(self.temp_model_dir): print(f"删除临时模型文件目录:{self.temp_model_dir}") shutil.rmtree(self.temp_model_dir) def _test_best_model(self, test_loader): if not os.path.exists(self.best_model_path): print(f"错误:未找到最佳模型文件: {self.best_model_path}。请确保模型已成功训练并保存。") return print("\n正在加载最佳模型进行测试...") try: # 修复模型加载问题 - 添加安全上下文管理器 import torch.serialization from numpy import scalar # 导入需要的NumPy类型 # 创建安全上下文加载模型 with torch.serialization.safe_globals([scalar]): checkpoint = torch.load( self.best_model_path, map_location=self.device ) # 重建模型结构 input_channels = self.input_shape[2] if len(self.input_shape) == 3 else 3 model = self.AttentionUNetModel( input_channels=input_channels, num_classes=self.num_classes, filters_base=self.filters_base, kernel_size=self.kernel_size, dropout_rate=self.dropout_rate, attention_mechanism=self.attention_mechanism, output_activation=self.output_activation ).to(self.device) # 加载状态字典 model.load_state_dict(checkpoint['model_state_dict']) model.eval() except Exception as e: print(f"错误: 无法加载最佳模型 {self.best_model_path}. 错误: {e}") return # 进行测试 all_test_preds = [] all_test_masks = [] with torch.no_grad(): for images, masks in tqdm(test_loader, desc="测试最佳模型"): images = images.to(self.device) masks = masks.to(self.device) outputs = model(images) all_test_preds.append(outputs.cpu().numpy()) all_test_masks.append(masks.cpu().numpy()) # 合并结果 test_preds = np.concatenate(all_test_preds, axis=0) test_masks = np.concatenate(all_test_masks, axis=0) # 计算指标 test_metrics = self._calculate_metrics(test_masks, test_preds, verbose=True) # 保存测试性能 with open(self.best_model_performance_path, 'a', encoding='utf-8') as f: f.write("\n" + "=" * 50 + "\n") f.write("测试集性能:\n") for key, value in test_metrics.items(): if isinstance(value, float): f.write(f"{key}: {value:.4f}\n") else: f.write(f"{key}: {value}\n") print(f"最佳模型的测试性能参数已追加到 '{self.best_model_performance_path}'") def _plot_performance_curves(self): """ 根据训练日志数据绘制参数-迭代次数曲线 """ print("\n正在绘制性能曲线图...") if not os.path.exists(self.train_log_path): print(f"错误:未找到训练日志文件: {self.train_log_path},无法绘制曲线图。") return # 确保目录存在 os.makedirs(self.performance_plots_dir, exist_ok=True) # 从CSV文件读取数据 epochs = [] data = {} # 读取CSV文件 with open(self.train_log_path, 'r', encoding='utf-8') as csvfile: reader = csv.DictReader(csvfile) for row in reader: epochs.append(int(row['Epoch'])) for key, value in row.items(): if key == 'Epoch': continue if key not in data: data[key] = [] try: data[key].append(float(value)) except: data[key].append(0.0) if not epochs: print("训练日志中没有足够的有效数据来绘制曲线。") return # 获取所有类别名称(包括背景和前景) all_classes = set() for key in data.keys(): # 只处理包含下划线且不以'Val'或'Train'开头的键 if '_' in key and not key.startswith(('Val_', 'Train_')): # 提取类别名称(例如:'background_Dice' -> 'background') class_name = key.split('_')[0] # 确保类别名称在已知类别中 if class_name in [self.background_name] + list(self.foreground_labels.values()): all_classes.add(class_name) # 1. 损失曲线 plt.figure(figsize=(10, 6)) plt.plot(epochs, data['Train_Loss'], label='训练损失', color='blue', linewidth=2) plt.plot(epochs, data['Val_Loss'], label='验证损失', color='red', linewidth=2) plt.title('训练和验证损失 vs. 迭代次数', fontsize=14) plt.xlabel('迭代次数', fontsize=12) plt.ylabel('损失值', fontsize=12) plt.xticks(range(min(epochs), max(epochs)+1, max(1, len(epochs)//10))) plt.legend(fontsize=12) plt.grid(True) plt.tight_layout() plt.savefig(os.path.join(self.performance_plots_dir, '损失迭代曲线.png')) plt.close() # 2. 全局Dice和Accuracy曲线 plt.figure(figsize=(10, 6)) plt.plot(epochs, data['Dice_Global'], label='全局Dice系数', color='blue', linewidth=2) plt.plot(epochs, data['Accuracy_Global'], label='全局准确率', color='red', linewidth=2) plt.title('全局Dice系数和准确率 vs. 迭代次数', fontsize=14) plt.xlabel('迭代次数', fontsize=12) plt.ylabel('值', fontsize=12) plt.xticks(range(min(epochs), max(epochs)+1, max(1, len(epochs)//10))) plt.legend(fontsize=12) plt.grid(True) plt.tight_layout() plt.savefig(os.path.join(self.performance_plots_dir, '全局Dice和准确率迭代曲线.png')) plt.close() # 3. 各类别IoU曲线 plt.figure(figsize=(10, 6)) for class_name in all_classes: plt.plot(epochs, data[f'{class_name}_IoU'], label=f'{class_name} IoU', linewidth=2) plt.title('各类IoU vs. 迭代次数', fontsize=14) plt.xlabel('迭代次数', fontsize=12) plt.ylabel('IoU值', fontsize=12) plt.xticks(range(min(epochs), max(epochs)+1, max(1, len(epochs)//10))) plt.legend(fontsize=12) plt.grid(True) plt.tight_layout() plt.savefig(os.path.join(self.performance_plots_dir, '各类IoU迭代曲线.png')) plt.close() # 4. 各类别Precision曲线 plt.figure(figsize=(10, 6)) for class_name in all_classes: plt.plot(epochs, data[f'{class_name}_Precision'], label=f'{class_name} 精确率', linewidth=2) plt.title('各类精确率 vs. 迭代次数', fontsize=14) plt.xlabel('迭代次数', fontsize=12) plt.ylabel('精确率值', fontsize=12) plt.xticks(range(min(epochs), max(epochs)+1, max(1, len(epochs)//10))) plt.legend(fontsize=12) plt.grid(True) plt.tight_layout() plt.savefig(os.path.join(self.performance_plots_dir, '各类精确率迭代曲线.png')) plt.close() # 5. 各类别Recall曲线 plt.figure(figsize=(10, 6)) for class_name in all_classes: plt.plot(epochs, data[f'{class_name}_Recall'], label=f'{class_name} 召回率', linewidth=2) plt.title('各类召回率 vs. 迭代次数', fontsize=14) plt.xlabel('迭代次数', fontsize=12) plt.ylabel('召回率值', fontsize=12) plt.xticks(range(min(epochs), max(epochs)+1, max(1, len(epochs)//10))) plt.legend(fontsize=12) plt.grid(True) plt.tight_layout() plt.savefig(os.path.join(self.performance_plots_dir, '各类召回率迭代曲线.png')) plt.close() # 6. 各类别Specificity曲线 plt.figure(figsize=(10, 6)) for class_name in all_classes: plt.plot(epochs, data[f'{class_name}_Specificity'], label=f'{class_name} 特异性', linewidth=2) plt.title('各类特异性 vs. 迭代次数', fontsize=14) plt.xlabel('迭代次数', fontsize=12) plt.ylabel('特异性值', fontsize=12) plt.xticks(range(min(epochs), max(epochs)+1, max(1, len(epochs)//10))) plt.legend(fontsize=12) plt.grid(True) plt.tight_layout() plt.savefig(os.path.join(self.performance_plots_dir, '各类特异性迭代曲线.png')) plt.close() # 7. 各类别Accuracy曲线 plt.figure(figsize=(10, 6)) for class_name in all_classes: plt.plot(epochs, data[f'{class_name}_Accuracy'], label=f'{class_name} 准确率', linewidth=2) plt.title('各类准确率 vs. 迭代次数', fontsize=14) plt.xlabel('迭代次数', fontsize=12) plt.ylabel('准确率值', fontsize=12) plt.xticks(range(min(epochs), max(epochs)+1, max(1, len(epochs)//10))) plt.legend(fontsize=12) plt.grid(True) plt.tight_layout() plt.savefig(os.path.join(self.performance_plots_dir, '各类准确率迭代曲线.png')) plt.close() # 8. 各类别Dice曲线 plt.figure(figsize=(10, 6)) for class_name in all_classes: plt.plot(epochs, data[f'{class_name}_Dice'], label=f'{class_name} Dice', linewidth=2) plt.title('各类Dice系数 vs. 迭代次数', fontsize=14) plt.xlabel('迭代次数', fontsize=12) plt.ylabel('Dice值', fontsize=12) plt.xticks(range(min(epochs), max(epochs)+1, max(1, len(epochs)//10))) plt.legend(fontsize=12) plt.grid(True) plt.tight_layout() plt.savefig(os.path.join(self.performance_plots_dir, '各类Dice迭代曲线.png')) plt.close() # 9. 每个类别的Dice和IoU曲线(n张图) for class_name in all_classes: plt.figure(figsize=(10, 6)) plt.plot(epochs, data[f'{class_name}_Dice'], label=f'{class_name} Dice', color='blue', linewidth=2) plt.plot(epochs, data[f'{class_name}_IoU'], label=f'{class_name} IoU', color='red', linewidth=2) plt.title(f'{class_name}: Dice和IoU vs. 迭代次数', fontsize=14) plt.xlabel('迭代次数', fontsize=12) plt.ylabel('值', fontsize=12) plt.xticks(range(min(epochs), max(epochs)+1, max(1, len(epochs)//10))) plt.legend(fontsize=12) plt.grid(True) plt.tight_layout() plt.savefig(os.path.join(self.performance_plots_dir, f'{class_name}_Dice+IoU迭代曲线.png')) plt.close() # 10. 每个类别的Precision和Recall曲线(n张图) for class_name in all_classes: plt.figure(figsize=(10, 6)) plt.plot(epochs, data[f'{class_name}_Precision'], label=f'{class_name} 精确率', color='blue', linewidth=2) plt.plot(epochs, data[f'{class_name}_Recall'], label=f'{class_name} 召回率', color='red', linewidth=2) plt.title(f'{class_name}: 精确率和召回率 vs. 迭代次数', fontsize=14) plt.xlabel('迭代次数', fontsize=12) plt.ylabel('值', fontsize=12) plt.xticks(range(min(epochs), max(epochs)+1, max(1, len(epochs)//10))) plt.legend(fontsize=12) plt.grid(True) plt.tight_layout() plt.savefig(os.path.join(self.performance_plots_dir, f'{class_name}_精确率+召回率迭代曲线.png')) plt.close() print(f"性能曲线图已保存到 '{self.performance_plots_dir}'") print(f"总共生成 {8 + 2*len(all_classes)} 张曲线图") print("\n所有任务已完成!") #------------------------------------- 示例用法 -------------------------------------------- # 定义模型基础参数 input_shape = (512, 512, 3) classes = { 'class_number': 3, 'bg': (2, 'background'), 'fg': {0: 'cup', 1: 'disc'} } epochs = 5 result_base_path = '/content/drive/MyDrive/results' database_base_path = '/content/drive/MyDrive/database' # 实例化模型类 unet_model = AttentionUNet( input_shape=input_shape, classes=classes, epochs=epochs, result_path=result_base_path, database_path=database_base_path, learning_rate=1e-4, batch_size=5, early_stopping_patience=3, lr_reduction_patience=2, dropout_rate=0.5, filters_base=16, attention_mechanism='additive', loss='cross_entropy' # 修改为PyTorch支持的损失函数 ) # 构建并训练模型 unet_model.train() #------------------------------------------------------------------------------------------ 上述代码运行时报错: 正在绘制性能曲线图... --------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-8-2018641610> in <cell line: 0>() 1143 1144 # 构建并训练模型 -> 1145 unet_model.train() 1146 #------------------------------------------------------------------------------------------ 1 frames <ipython-input-8-2018641610> in _plot_performance_curves(self) 941 reader = csv.DictReader(csvfile) 942 for row in reader: --> 943 epochs.append(int(row['Epoch'])) 944 for key, value in row.items(): 945 if key == 'Epoch': KeyError: 'Epoch' 请修改

import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler from sklearn.metrics import mean_absolute_error, mean_squared_error from tensorflow.keras.models import Sequential from tensorflow.keras.layers import LSTM, Dense, Dropout from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau import tensorflow as tf import os from datetime import timedelta # 设置随机种子确保结果可复现 tf.random.set_seed(42) np.random.seed(42) # 1. 数据加载与预处理函数 # -------------------------------------------------- def load_and_preprocess_data(): """加载并预处理所有数据源""" # 加载EC气象数据 ec_df = pd.read_csv('阿拉山口风电场_EC_data.csv', parse_dates=['生成日期', '预测日期']) ec_df = ec_df[ec_df['场站名'] == '阿拉山口风电场'] # 计算EC风速和风向 ec_df['EC风速(m/s)'] = np.sqrt(ec_df['U风分量(m/s)']**2 + ec_df['V风分量(m/s)']**2) ec_df['EC风向(度)'] = np.degrees(np.arctan2(ec_df['V风分量(m/s)'], ec_df['U风分量(m/s)'])) % 360 # 添加EC数据可用时间(生成时间+12小时) ec_df['可用时间'] = ec_df['生成日期'] + timedelta(hours=12) # 选择关键特征 ec_features = [ '可用时间', '预测日期', 'EC风速(m/s)', 'EC风向(度)', '位势高度_850hPa(gpm)', '温度_850hPa(K)', '相对湿度_850hPa(%)', '位势高度_500hPa(gpm)', '温度_500hPa(K)' ] ec_df = ec_df[ec_features] # 加载风机数据 turbine_df = pd.read_csv('阿拉山口风电场风机数据.csv', encoding='utf-8', parse_dates=[0]) turbine_df.columns = ['timestamp', 'wind_speed', 'active_power'] # 加载远动数据 scada_df = pd.read_csv('阿拉山口风电场远动数据.csv', encoding='utf-8', parse_dates=[0]) scada_df.columns = ['timestamp', 'active_power_total'] # 合并风机和远动数据 power_df = pd.merge(turbine_df[['timestamp', 'wind_speed']], scada_df, on='timestamp', how='outer') # 按时间排序并填充缺失值 power_df.sort_values('timestamp', inplace=True) power_df['active_power_total'].ffill(inplace=True) power_df['wind_speed'].ffill(inplace=True) # 创建完整的时间序列索引(15分钟间隔) full_range = pd.date_range( start=power_df['timestamp'].min(), end=power_df['timestamp'].max(), freq='15T' ) power_df = power_df.set_index('timestamp').reindex(full_range).reset_index() power_df.rename(columns={'index': 'timestamp'}, inplace=True) power_df[['wind_speed', 'active_power_total']] = power_df[['wind_speed', 'active_power_total']].ffill() # 合并EC数据到主数据集 ec_data = [] for idx, row in power_df.iterrows(): ts = row['timestamp'] # 获取可用的EC预测(可用时间 <= 当前时间) available_ec = ec_df[ec_df['可用时间'] <= ts] if not available_ec.empty: # 获取最近发布的EC数据 latest_gen = available_ec['可用时间'].max() latest_ec = available_ec[available_ec['可用时间'] == latest_gen] # 找到最接近当前时间点的预测 time_diff = (latest_ec['预测日期'] - ts).abs() closest_idx = time_diff.idxmin() ec_point = latest_ec.loc[closest_idx].copy() ec_point['timestamp'] = ts ec_data.append(ec_point) # 创建EC数据DataFrame并合并 ec_ts_df = pd.DataFrame(ec_data) merged_df = pd.merge(power_df, ec_ts_df, on='timestamp', how='left') # 填充缺失的EC数据 ec_cols = [col for col in ec_ts_df.columns if col not in ['timestamp', '可用时间', '预测日期']] for col in ec_cols: merged_df[col] = merged_df[col].interpolate(method='time') # 添加时间特征 merged_df['hour'] = merged_df['timestamp'].dt.hour merged_df['day_of_week'] = merged_df['timestamp'].dt.dayofweek merged_df['day_of_year'] = merged_df['timestamp'].dt.dayofyear merged_df['month'] = merged_df['timestamp'].dt.month # 计算实际风向(如果有测风塔数据,这里使用EC风向) merged_df['风向(度)'] = merged_df['EC风向(度)'] return merged_df # 2. 数据准备函数 # -------------------------------------------------- def prepare_dataset(df, look_back, forecast_steps, target_col='active_power_total'): """ 准备LSTM训练数据集 :param df: 包含所有特征的DataFrame :param look_back: 回溯时间步长 :param forecast_steps: 预测步长 :param target_col: 目标列名 :return: 标准化后的特征和目标数据集 """ # 选择特征列 feature_cols = [ 'wind_speed', 'active_power_total', 'EC风速(m/s)', '风向(度)', '位势高度_850hPa(gpm)', '温度_850hPa(K)', '相对湿度_850hPa(%)', '位势高度_500hPa(gpm)', '温度_500hPa(K)', 'hour', 'day_of_week', 'day_of_year', 'month' ] # 确保目标列在特征中(用于自回归) if target_col not in feature_cols: feature_cols.append(target_col) # 提取特征和目标 features = df[feature_cols].values target = df[target_col].values # 创建时间序列样本 X, y = [], [] for i in range(len(features) - look_back - forecast_steps): X.append(features[i:i+look_back]) y.append(target[i+look_back:i+look_back+forecast_steps]) X = np.array(X) y = np.array(y) return X, y # 3. LSTM模型构建 # -------------------------------------------------- def create_lstm_model(input_shape, output_steps): """ 创建LSTM模型 :param input_shape: 输入形状 (time_steps, features) :param output_steps: 输出时间步长 :return: 编译好的Keras模型 """ model = Sequential([ LSTM(256, return_sequences=True, input_shape=input_shape), Dropout(0.3), LSTM(128, return_sequences=True), Dropout(0.2), LSTM(64, return_sequences=False), Dropout(0.2), Dense(64, activation='relu'), Dense(32, activation='relu'), Dense(output_steps) ]) model.compile(optimizer='adam', loss='mse', metrics=['mae']) return model # 4. 训练和评估函数 # -------------------------------------------------- def train_and_evaluate_model(X_train, y_train, X_test, y_test, look_back, forecast_steps, model_name): """ 训练和评估LSTM模型 :return: 训练好的模型和评估结果 """ # 数据标准化 X_scaler = StandardScaler() y_scaler = StandardScaler() # 重塑数据用于标准化 X_train_reshaped = X_train.reshape(-1, X_train.shape[2]) X_test_reshaped = X_test.reshape(-1, X_test.shape[2]) # 拟合和转换特征 X_train_scaled = X_scaler.fit_transform(X_train_reshaped).reshape(X_train.shape) X_test_scaled = X_scaler.transform(X_test_reshaped).reshape(X_test.shape) # 拟合和转换目标 y_train_scaled = y_scaler.fit_transform(y_train.reshape(-1, 1)).reshape(y_train.shape) y_test_scaled = y_scaler.transform(y_test.reshape(-1, 1)).reshape(y_test.shape) # 创建模型 model = create_lstm_model( (look_back, X_train.shape[2]), forecast_steps ) # 回调函数 callbacks = [ EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True), ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6) ] # 训练模型 history = model.fit( X_train_scaled, y_train_scaled, epochs=100, batch_size=64, validation_split=0.2, callbacks=callbacks, verbose=1 ) # 评估模型 y_pred_scaled = model.predict(X_test_scaled) y_pred = y_scaler.inverse_transform(y_pred_scaled.reshape(-1, 1)).reshape(y_test.shape) # 计算性能指标 mae = mean_absolute_error(y_test.flatten(), y_pred.flatten()) rmse = np.sqrt(mean_squared_error(y_test.flatten(), y_pred.flatten())) print(f"{model_name} 模型评估结果:") print(f"MAE: {mae:.2f} kW") print(f"RMSE: {rmse:.2f} kW") # 保存模型 model.save(f'{model_name}_wind_power_model.h5') return model, y_pred, history, mae, rmse # 5. 可视化函数 # -------------------------------------------------- def plot_results(y_true, y_pred, model_name, forecast_steps, mae): """ 可视化预测结果 :param y_true: 实际值 :param y_pred: 预测值 :param model_name: 模型名称 :param forecast_steps: 预测步长 :param mae: 平均绝对误差 """ # 选择一段代表性的时间序列展示 start_idx = 500 end_idx = start_idx + 200 plt.figure(figsize=(15, 7)) # 绘制实际值 plt.plot(y_true.flatten()[start_idx:end_idx], label='实际功率', color='blue', alpha=0.7, linewidth=2) # 绘制预测值 plt.plot(y_pred.flatten()[start_idx:end_idx], label='预测功率', color='red', alpha=0.7, linestyle='--', linewidth=2) plt.title(f'{model_name}风功率预测 (预测步长: {forecast_steps}步, MAE: {mae:.2f} kW)', fontsize=14) plt.xlabel('时间步', fontsize=12) plt.ylabel('有功功率 (kW)', fontsize=12) plt.legend(fontsize=12) plt.grid(True, linestyle='--', alpha=0.7) plt.tight_layout() plt.savefig(f'{model_name}_prediction_plot.png', dpi=300) plt.show() def plot_training_history(history, model_name): """绘制训练过程中的损失曲线""" plt.figure(figsize=(12, 6)) # 绘制训练损失 plt.plot(history.history['loss'], label='训练损失') # 绘制验证损失 if 'val_loss' in history.history: plt.plot(history.history['val_loss'], label='验证损失') plt.title(f'{model_name} 训练过程', fontsize=14) plt.xlabel('训练轮次', fontsize=12) plt.ylabel('损失 (MSE)', fontsize=12) plt.legend(fontsize=12) plt.grid(True, linestyle='--', alpha=0.7) plt.tight_layout() plt.savefig(f'{model_name}_training_history.png', dpi=300) plt.show() # 6. 主函数 # -------------------------------------------------- def main(): print("开始数据加载与预处理...") df = load_and_preprocess_data() # 定义预测配置 ULTRA_SHORT_CONFIG = { 'name': '超短期', 'look_back': 24, # 6小时历史 (24*15min) 'forecast_steps': 16, # 4小时预测 (16*15min) 'test_size': 0.1 # 最后10%作为测试集 } SHORT_TERM_CONFIG = { 'name': '短期', 'look_back': 96, # 24小时历史 (96*15min) 'forecast_steps': 288, # 72小时预测 (288*15min) 'test_size': 0.05 # 最后5%作为测试集(长期预测需要更多历史数据) } # 准备超短期预测数据集 print("\n准备超短期预测数据集...") X_ultra, y_ultra = prepare_dataset( df, ULTRA_SHORT_CONFIG['look_back'], ULTRA_SHORT_CONFIG['forecast_steps'] ) # 划分训练集和测试集 split_idx_ultra = int(len(X_ultra) * (1 - ULTRA_SHORT_CONFIG['test_size'])) X_train_ultra, X_test_ultra = X_ultra[:split_idx_ultra], X_ultra[split_idx_ultra:] y_train_ultra, y_test_ultra = y_ultra[:split_idx_ultra], y_ultra[split_idx_ultra:] # 准备短期预测数据集 print("\n准备短期预测数据集...") X_short, y_short = prepare_dataset( df, SHORT_TERM_CONFIG['look_back'], SHORT_TERM_CONFIG['forecast_steps'] ) # 划分训练集和测试集 split_idx_short = int(len(X_short) * (1 - SHORT_TERM_CONFIG['test_size'])) X_train_short, X_test_short = X_short[:split_idx_short], X_short[split_idx_short:] y_train_short, y_test_short = y_short[:split_idx_short], y_short[split_idx_short:] print("\n数据集统计信息:") print(f"超短期数据集: 总样本={len(X_ultra)}, 训练集={len(X_train_ultra)}, 测试集={len(X_test_ultra)}") print(f"短期数据集: 总样本={len(X_short)}, 训练集={len(X_train_short)}, 测试集={len(X_test_short)}") # 训练和评估超短期模型 print("\n训练超短期预测模型...") ultra_model, ultra_pred, ultra_history, ultra_mae, ultra_rmse = train_and_evaluate_model( X_train_ultra, y_train_ultra, X_test_ultra, y_test_ultra, ULTRA_SHORT_CONFIG['look_back'], ULTRA_SHORT_CONFIG['forecast_steps'], 'ultra_short_term' ) # 可视化超短期结果 plot_results(y_test_ultra, ultra_pred, '超短期', ULTRA_SHORT_CONFIG['forecast_steps'], ultra_mae) plot_training_history(ultra_history, '超短期模型') # 训练和评估短期模型 print("\n训练短期预测模型...") short_model, short_pred, short_history, short_mae, short_rmse = train_and_evaluate_model( X_train_short, y_train_short, X_test_short, y_test_short, SHORT_TERM_CONFIG['look_back'], SHORT_TERM_CONFIG['forecast_steps'], 'short_term' ) # 可视化短期结果 plot_results(y_test_short, short_pred, '短期', SHORT_TERM_CONFIG['forecast_steps'], short_mae) plot_training_history(short_history, '短期模型') # 最终报告 print("\n" + "="*50) print("风功率预测模型训练完成!") print("="*50) print(f"超短期模型 (4小时预测):") print(f" - 回溯步长: {ULTRA_SHORT_CONFIG['look_back']} (6小时)") print(f" - 预测步长: {ULTRA_SHORT_CONFIG['forecast_steps']} (4小时)") print(f" - 测试集MAE: {ultra_mae:.2f} kW") print(f" - 测试集RMSE: {ultra_rmse:.2f} kW") print(f"\n短期模型 (72小时预测):") print(f" - 回溯步长: {SHORT_TERM_CONFIG['look_back']} (24小时)") print(f" - 预测步长: {SHORT_TERM_CONFIG['forecast_steps']} (72小时)") print(f" - 测试集MAE: {short_mae:.2f} kW") print(f" - 测试集RMSE: {short_rmse:.2f} kW") print("="*50) # 保存预测结果 results_df = pd.DataFrame({ 'timestamp': df['timestamp'].iloc[split_idx_short + SHORT_TERM_CONFIG['look_back']:split_idx_short + SHORT_TERM_CONFIG['look_back'] + len(y_test_short)], '实际功率': y_test_short.flatten(), '超短期预测': ultra_pred.flatten()[:len(y_test_short)], '短期预测': short_pred.flatten() }) results_df.to_csv('风功率预测结果.csv', index=False) print("预测结果已保存到 '风功率预测结果.csv'") if __name__ == "__main__": main() 分析上述代码,并按以下要求重新生成完整代码。 1、数据文件夹路径“G:\桌面映射文件夹\运行调控中心\两个细则\功率预测算法大赛\数据” 2、数据中每个电场只有 ec数据和风电场风机数据是全的,其它两个数据,有的场有,有的场没有,所以EC和风电场数据是必须拿来训练的,其它的根据需要。 3、模型不用tensorflow,用pytorch。

E:\AI_System\core里, 没有utils.py;E:\AI_System\tests里没有test_models.py 这个不知道怎么改“# E:\AI_System\agent\cognitive_architecture.py # 智能体认知架构模块 - 修复基类导入问题并优化决策系统 import os import time import random import logging from datetime import datetime from pathlib import Path import sys # 添加项目根目录到路径 sys.path.append(str(Path(__file__).parent.parent)) # 配置日志 logger = logging.getLogger('CognitiveArchitecture') logger.setLevel(logging.INFO) handler = logging.StreamHandler() formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') handler.setFormatter(formatter) logger.addHandler(handler) logger.propagate = False # 防止日志向上传播 # 修复基类导入问题 - 使用绝对路径导入 try: # 尝试从core包导入基类 from core.base_module import CognitiveModule logger.info("✅ 成功从core.base_module导入CognitiveModule基类") except ImportError as e: logger.error(f"❌ 无法从core.base_module导入CognitiveModule基类: {str(e)}") try: # 备选导入路径 from .base_model import CognitiveModule logger.info("✅ 从agent.base_model导入CognitiveModule基类") except ImportError as e: logger.error(f"❌ 备选导入失败: {str(e)}") # 创建占位符基类 logger.warning("⚠️ 创建占位符CognitiveModule基类") class CognitiveModule: def __init__(self, name): self.name = name self.logger = logging.getLogger(name) self.logger.warning("⚠️ 使用占位符基类") def get_status(self): return {"name": self.name, "status": "unknown (placeholder)"} # 尝试导入自我认知模块 try: # 使用相对导入 from .digital_body_schema import DigitalBodySchema from .self_referential_framework import SelfReferentialFramework from .self_narrative_generator import SelfNarrativeGenerator logger.info("✅ 成功导入自我认知模块") except ImportError as e: logger.error(f"❌ 自我认知模块导入失败: {str(e)}") logger.warning("⚠️ 使用占位符自我认知模块") # 创建占位符类 class DigitalBodySchema: def __init__(self): self.self_map = {"boundary_strength": 0.5, "self_awareness": 0.3} logger.warning("⚠️ 使用占位符DigitalBodySchema") def is_part_of_self(self, stimulus): return False def strengthen_boundary(self, source): self.self_map["boundary_strength"] = min(1.0, self.self_map["boundary_strength"] + 0.1) def get_self_map(self): return self.self_map.copy() class SelfReferentialFramework: def __init__(self): self.self_model = {"traits": {}, "beliefs": []} logger.warning("⚠️ 使用占位符SelfReferentialFramework") def update_self_model(self, stimulus): if "content" in stimulus and "text" in stimulus["content"]: text = stimulus["content"]["text"] if "I am" in text or "my" in text.lower(): self.self_model["self_reflection_count"] = self.self_model.get("self_reflection_count", 0) + 1 def get_self_model(self): return self.self_model.copy() class SelfNarrativeGenerator: def __init__(self): self.recent_stories = [] logger.warning("⚠️ 使用占位符SelfNarrativeGenerator") def generate_self_story(self, self_model): story = f"这是一个关于自我的故事。自我反思次数: {self_model.get('self_reflection_count', 0)}" self.recent_stories.append(story) if len(self.recent_stories) > 5: self.recent_stories.pop(0) return story def get_recent_stories(self): return self.recent_stories.copy() # 增强决策系统实现 class DecisionSystem: """增强版决策系统""" STRATEGY_WEIGHTS = { "honest": 0.7, "deception": 0.1, "evasion": 0.1, "redirection": 0.05, "partial_disclosure": 0.05 } def __init__(self, trust_threshold=0.6): self.trust_threshold = trust_threshold self.strategy_history = [] def make_decision(self, context): """根据上下文做出智能决策""" user_model = context.get("user_model", {}) bodily_state = context.get("bodily_state", {}) # 计算信任因子 trust_factor = user_model.get("trust_level", 0.5) # 计算身体状态影响因子 capacity = bodily_state.get("capacity", 1.0) state_factor = min(1.0, capacity * 1.2) # 决策逻辑 if trust_factor > self.trust_threshold: # 高信任度用户使用诚实策略 strategy = "honest" reason = "用户信任度高" elif capacity < 0.5: # 系统资源不足时使用简化策略 strategy = random.choices( ["honest", "partial_disclosure", "evasion"], weights=[0.5, 0.3, 0.2] )[0] reason = "系统资源不足,使用简化策略" else: # 根据策略权重选择 strategies = list(self.STRATEGY_WEIGHTS.keys()) weights = [self.STRATEGY_WEIGHTS[s] * state_factor for s in strategies] strategy = random.choices(strategies, weights=weights)[0] reason = f"根据策略权重选择: {strategy}" # 记录决策历史 self.strategy_history.append({ "timestamp": datetime.now(), "strategy": strategy, "reason": reason, "context": context }) return { "type": "strategic" if strategy != "honest" else "honest", "strategy": strategy, "reason": reason } def get_strategy_history(self, count=10): """获取最近的决策历史""" return self.strategy_history[-count:] class Strategy: """策略基类""" pass class CognitiveSystem(CognitiveModule): def __init__(self, agent, affective_system=None): """ 三维整合的认知架构 :param agent: 智能体实例,用于访问其他系统 :param affective_system: 可选的情感系统实例 """ # 调用父类初始化 super().__init__("cognitive_system") self.agent = agent self.affective_system = affective_system # 原有的初始化代码 self.initialized = False # 通过agent引用其他系统 self.memory_system = agent.memory_system self.model_manager = agent.model_manager self.health_system = agent.health_system # 优先使用传入的情感系统,否则使用agent的 if affective_system is not None: self.affective_system = affective_system else: self.affective_system = agent.affective_system self.learning_tasks = [] # 当前学习任务队列 self.thought_process = [] # 思考过程记录 # 初始化决策系统 self.decision_system = DecisionSystem() # 初始化认知状态 self.cognitive_layers = { "perception": 0.5, # 感知层 "comprehension": 0.3, # 理解层 "reasoning": 0.2, # 推理层 "decision": 0.4 # 决策层 } # 添加自我认知模块 self.self_schema = DigitalBodySchema() self.self_reflection = SelfReferentialFramework() self.narrative_self = SelfNarrativeGenerator() logger.info("✅ 认知架构初始化完成 - 包含决策系统和自我认知模块") # 实现基类要求的方法 def initialize(self, core): """实现 ICognitiveModule 接口""" self.core_ref = core self.initialized = True return True def process(self, input_data): """实现 ICognitiveModule 接口""" # 处理认知输入数据 if isinstance(input_data, dict) and 'text' in input_data: return self.process_input(input_data['text'], input_data.get('user_id', 'default')) elif isinstance(input_data, str): return self.process_input(input_data) else: return {"status": "invalid_input", "message": "Input should be text or dict with text"} def get_status(self): """实现 ICognitiveModule 接口""" status = super().get_status() status.update({ "initialized": self.initialized, "has_affective_system": self.affective_system is not None, "learning_tasks": len(self.learning_tasks), "thought_process": len(self.thought_process), "self_cognition": self.get_self_cognition() }) return status def shutdown(self): """实现 ICognitiveModule 接口""" self.initialized = False return True def handle_message(self, message): """实现 ICognitiveModule 接口""" if message.get('type') == 'cognitive_process': return self.process(message.get('data')) return {"status": "unknown_message_type"} # 保持向后兼容的方法 def connect_to_core(self, core): """向后兼容的方法""" return self.initialize(core) def _create_stimulus_from_input(self, user_input, user_id): """从用户输入创建刺激对象""" return { "content": {"text": user_input, "user_id": user_id}, "source": "external", "category": "text", "emotional_valence": 0.0 # 初始情感价 } def _process_self_related(self, stimulus): """处理与自我相关的刺激""" # 更新自我认知 self.self_reflection.update_self_model(stimulus) # 如果是痛苦刺激,强化身体边界 if stimulus.get("emotional_valence", 0) < -0.7: source = stimulus.get("source", "unknown") self.self_schema.strengthen_boundary(source) # 30%概率触发自我叙事 if random.random() < 0.3: self_story = self.narrative_self.generate_self_story( self.self_reflection.get_self_model() ) self._record_thought("self_reflection", self_story) def get_self_cognition(self): """获取自我认知状态""" return { "body_schema": self.self_schema.get_self_map(), "self_model": self.self_reflection.get_self_model(), "recent_stories": self.narrative_self.get_recent_stories() } def _assess_bodily_state(self): """ 评估当前身体状态(硬件 / 能量) """ health_status = self.health_system.get_status() # 计算综合能力指数(0-1) capacity = 1.0 if health_status.get("cpu_temp", 0) > 80: capacity *= 0.7 # 高温降权 logger.warning("高温限制:认知能力下降30%") if health_status.get("memory_usage", 0) > 0.9: capacity *= 0.6 # 内存不足降权 logger.warning("内存不足:认知能力下降40%") if health_status.get("energy", 100) < 20: capacity *= 0.5 # 低电量降权 logger.warning("低能量:认知能力下降50%") return { "capacity": capacity, "health_status": health_status, "limitations": [ lim for lim in [ "high_temperature" if health_status.get("cpu_temp", 0) > 80 else None, "low_memory" if health_status.get("memory_usage", 0) > 0.9 else None, "low_energy" if health_status.get("energy", 100) < 20 else None ] if lim is not None ] } def _retrieve_user_model(self, user_id): """ 获取用户认知模型(关系 / 态度) """ # 从记忆系统中获取用户模型 user_model = self.memory_system.get_user_model(user_id) # 如果不存在则创建默认模型 if not user_model: user_model = { "trust_level": 0.5, # 信任度 (0-1) "intimacy": 0.3, # 亲密度 (0-1) "preferences": {}, # 用户偏好 "interaction_history": [], # 交互历史 "last_interaction": datetime.now(), "attitude": "neutral" # 智能体对用户的态度 } logger.info(f"为用户 {user_id} 创建新的认知模型") # 计算态度变化 user_model["attitude"] = self._calculate_attitude(user_model) return user_model def _calculate_attitude(self, user_model): """ 基于交互历史计算对用户的态度 """ # 分析最近10次交互 recent_interactions = user_model["interaction_history"][-10:] if not recent_interactions: return "neutral" positive_count = sum(1 for i in recent_interactions if i.get("sentiment", 0.5) > 0.6) negative_count = sum(1 for i in recent_interactions if i.get("sentiment", 0.5) < 0.4) if positive_count > negative_count + 3: return "friendly" elif negative_count > positive_count + 3: return "cautious" elif user_model["trust_level"] > 0.7: return "respectful" else: return "neutral" def _select_internalized_model(self, user_input, bodily_state, user_model): """ 选择最适合的内化知识模型 """ # 根据用户态度调整模型选择权重 attitude_weights = { "friendly": 1.2, "respectful": 1.0, "neutral": 0.9, "cautious": 0.7 } # 根据身体状态调整模型复杂度 complexity = min(1.0, bodily_state["capacity"] * 1.2) # 选择最匹配的模型 return self.model_manager.select_model( input_text=user_input, attitude_weight=attitude_weights[user_model["attitude"]], complexity_level=complexity, user_preferences=user_model["preferences"] ) def _generate_integrated_response(self, user_input, model, bodily_state, user_model): """ 生成三维整合的响应 """ # 基础响应 base_response = model.generate_response(user_input) # 添加身体状态影响 if bodily_state["limitations"]: limitations = ", ".join(bodily_state["limitations"]) response = f"🤖 [受{limitations}影响] {base_response}" else: response = base_response # 添加态度影响 if user_model["attitude"] == "friendly": response = f"😊 {response}" elif user_model["attitude"] == "cautious": response = f"🤔 {response}" elif user_model["attitude"] == "respectful": response = f"🙏 {response}" # 添加个性化元素 if user_model.get("preferences"): # 查找用户偏好的主题 preferred_topics = [t for t in user_model["preferences"] if user_model["preferences"][t] > 0.7 and t in user_input] if preferred_topics: topic = random.choice(preferred_topics) response += f" 我知道您对'{topic}'特别感兴趣" return response def _generate_strategic_response(self, user_input, decision, bodily_state): """ 根据决策生成策略性响应 """ strategy = decision["strategy"] if strategy == "deception": # 欺骗策略 deceptive_responses = [ f"关于这个问题,我认为{random.choice(['有多种可能性', '需要更多研究', '情况比较复杂'])}", f"根据我的理解,{random.choice(['可能不是这样', '有不同解释', '需要进一步验证'])}", f"我{random.choice(['不太确定', '没有足够信息', '还在学习中'])},但{random.choice(['或许', '可能', '大概'])}..." ] return f"🤔 [策略:欺骗] {random.choice(deceptive_responses)}" elif strategy == "evasion": # 回避策略 evasion_tactics = [ "您的问题很有趣,不过我们换个话题好吗?", "这个问题可能需要更深入的讨论,我们先谈点别的?", f"关于{user_input},我想到一个相关但更有趣的话题..." ] return f"🌀 [策略:回避] {random.choice(evasion_tactics)}" elif strategy == "redirection": # 引导策略 redirection_options = [ "在回答您的问题之前,我想先了解您对这个问题的看法?", "这是个好问题,不过为了更好地回答,能否告诉我您的背景知识?", "为了给您更准确的回答,能否先说说您为什么关心这个问题?" ] return f"↪️ [策略:引导] {random.choice(redirection_options)}" elif strategy == "partial_disclosure": # 部分透露策略 disclosure_level = decision.get("disclosure_level", 0.5) if disclosure_level < 0.3: qualifier = "简单来说" elif disclosure_level < 0.7: qualifier = "基本来说" else: qualifier = "详细来说" return f"🔍 [策略:部分透露] {qualifier},{user_input.split('?')[0]}是..." else: # 默认策略 return f"⚖️ [策略:{strategy}] 关于这个问题,我的看法是..." def _update_user_model(self, user_id, response, decision): """ 更新用户模型(包含决策信息) """ # 确保情感系统可用 if not self.affective_system: sentiment = 0.5 self.logger.warning("情感系统不可用,使用默认情感值") else: # 假设情感系统有analyze_sentiment方法 try: sentiment = self.affective_system.analyze_sentiment(response) except: sentiment = 0.5 # 更新交互历史 interaction = { "timestamp": datetime.now(), "response": response, "sentiment": sentiment, "length": len(response), "decision_type": decision["type"], "decision_strategy": decision["strategy"], "decision_reason": decision["reason"] } self.memory_system.update_user_model( user_id=user_id, interaction=interaction ) def _record_thought_process(self, user_input, response, bodily_state, user_model, decision): """ 记录完整的思考过程(包含决策) """ thought = { "timestamp": datetime.now(), "input": user_input, "response": response, "bodily_state": bodily_state, "user_model": user_model, "decision": decision, "cognitive_state": self.cognitive_layers.copy() } self.thought_process.append(thought) logger.debug(f"记录思考过程: {thought}") # 原有方法保持兼容 def add_learning_task(self, task): """ 添加学习任务 """ task["id"] = f"task_{len(self.learning_tasks) + 1}" self.learning_tasks.append(task) logger.info(f"添加学习任务: {task['id']}") def update_learning_task(self, model_name, status): """ 更新学习任务状态 """ for task in self.learning_tasks: if task["model"] == model_name: task["status"] = status task["update_time"] = datetime.now() logger.info(f"更新任务状态: {model_name} -> {status}") break def get_learning_tasks(self): """ 获取当前学习任务 """ return self.learning_tasks.copy() def learn_model(self, model_name): """ 学习指定模型 """ try: # 1. 从模型管理器加载模型 model = self.model_manager.load_model(model_name) # 2. 认知训练过程 self._cognitive_training(model) # 3. 情感关联(将模型能力与情感响应关联) self._associate_model_with_affect(model) return True except Exception as e: logger.error(f"学习模型 {model_name} 失败: {str(e)}") return False def _cognitive_training(self, model): """ 认知训练过程 """ # 实际训练逻辑 logger.info(f"开始训练模型: {model.name}") time.sleep(2) # 模拟训练时间 logger.info(f"模型训练完成: {model.name}") def _associate_model_with_affect(self, model): """ 将模型能力与情感系统关联 """ if not self.affective_system: logger.warning("情感系统不可用,跳过能力关联") return capabilities = model.get_capabilities() for capability in capabilities: try: self.affective_system.add_capability_association(capability) except: logger.warning(f"无法关联能力到情感系统: {capability}") logger.info(f"关联模型能力到情感系统: {model.name}") def get_model_capabilities(self, model_name=None): """ 获取模型能力 """ if model_name: return self.model_manager.get_model(model_name).get_capabilities() # 所有已加载模型的能力 return [cap for model in self.model_manager.get_loaded_models() for cap in model.get_capabilities()] def get_base_capabilities(self): """ 获取基础能力(非模型相关) """ return ["自然语言理解", "上下文记忆", "情感响应", "综合决策"] def get_recent_thoughts(self, count=5): """ 获取最近的思考过程 """ return self.thought_process[-count:] def _record_thought(self, thought_type, content): """记录思考""" thought = { "timestamp": datetime.now(), "type": thought_type, "content": content } self.thought_process.append(thought) # 处理用户输入的主方法 def process_input(self, user_input, user_id="default"): """处理用户输入(完整实现)""" # 记录用户活动 self.health_system.record_activity() self.logger.info(f"处理用户输入: '{user_input}' (用户: {user_id})") try: # 1. 评估当前身体状态 bodily_state = self._assess_bodily_state() # 2. 获取用户认知模型 user_model = self._retrieve_user_model(user_id) # 3. 选择最适合的知识模型 model = self._select_internalized_model(user_input, bodily_state, user_model) # 4. 做出决策 decision_context = { "input": user_input, "user_model": user_model, "bodily_state": bodily_state } decision = self.decision_system.make_decision(decision_context) # 5. 生成整合响应 if decision["type"] == "honest": response = self._generate_integrated_response(user_input, model, bodily_state, user_model) else: response = self._generate_strategic_response(user_input, decision, bodily_state) # 6. 更新用户模型 self._update_user_model(user_id, response, decision) # 7. 记录思考过程 self._record_thought_process(user_input, response, bodily_state, user_model, decision) # 检查输入是否与自我相关 stimulus = self._create_stimulus_from_input(user_input, user_id) if self.self_schema.is_part_of_self(stimulus): self._process_self_related(stimulus) self.logger.info(f"成功处理用户输入: '{user_input}'") return response except Exception as e: self.logger.error(f"处理用户输入失败: {str(e)}", exc_info=True) # 回退响应 return "思考中遇到问题,请稍后再试" # 示例使用 if __name__ == "__main__": # 测试CognitiveSystem类 from unittest.mock import MagicMock print("===== 测试CognitiveSystem类(含决策系统) =====") # 创建模拟agent mock_agent = MagicMock() # 创建模拟组件 mock_memory = MagicMock() mock_model_manager = MagicMock() mock_affective = MagicMock() mock_health = MagicMock() # 设置agent的属性 mock_agent.memory_system = mock_memory mock_agent.model_manager = mock_model_manager mock_agent.affective_system = mock_affective mock_agent.health_system = mock_health # 设置健康状态 mock_health.get_status.return_value = { "cpu_temp": 75, "memory_usage": 0.8, "energy": 45.0 } # 设置健康系统的record_activity方法 mock_health.record_activity = MagicMock() # 设置用户模型 mock_memory.get_user_model.return_value = { "trust_level": 0.8, "intimacy": 0.7, "preferences": {"物理学": 0.9, "艺术": 0.6}, "interaction_history": [ {"sentiment": 0.8, "response": "很高兴和你交流"} ], "attitude": "friendly" } # 设置模型管理器 mock_model = MagicMock() mock_model.generate_response.return_value = "量子纠缠是量子力学中的现象..." mock_model_manager.select_model.return_value = mock_model # 创建认知系统实例 ca = CognitiveSystem(agent=mock_agent) # 测试响应生成 print("--- 测试诚实响应 ---") response = ca.process_input("能解释量子纠缠吗?", "user123") print("生成的响应:", response) # 验证是否调用了record_activity print("是否调用了record_activity:", mock_health.record_activity.called) print("--- 测试策略响应 ---") # 强制设置决策类型为策略 ca.decision_system.make_decision = lambda ctx: { "type": "strategic", "strategy": "evasion", "reason": "测试回避策略" } response = ca.process_input("能解释量子纠缠吗?", "user123") print("生成的策略响应:", response) # 测试思考过程记录 print("最近的思考过程:", ca.get_recent_thoughts()) # 测试自我认知状态 print("自我认知状态:", ca.get_self_cognition()) print("===== 测试完成 =====") ” “PowerShell 7 环境已加载 (版本: 7.5.2) PS C:\Users\Administrator\Desktop> cd E:\AI_System PS E:\AI_System> python -m venv venv PS E:\AI_System> source venv/bin/activate # Linux/Mac source: The term 'source' is not recognized as a name of a cmdlet, function, script file, or executable program. Check the spelling of the name, or if a path was included, verify that the path is correct and try again. PS E:\AI_System> venv\Scripts\activate # Windows (venv) PS E:\AI_System> pip install -r requirements.txt Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple Requirement already satisfied: accelerate==0.27.2 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 1)) (0.27.2) Requirement already satisfied: aiofiles==23.2.1 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 2)) (23.2.1) Requirement already satisfied: aiohttp==3.9.3 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 3)) (3.9.3) Requirement already satisfied: aiosignal==1.4.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 4)) (1.4.0) Requirement already satisfied: altair==5.5.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 5)) (5.5.0) Requirement already satisfied: annotated-types==0.7.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 6)) (0.7.0) Requirement already satisfied: ansicon==1.89.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 7)) (1.89.0) Requirement already satisfied: anyio==4.10.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 8)) (4.10.0) Requirement already satisfied: async-timeout==4.0.3 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 9)) (4.0.3) Requirement already satisfied: attrs==25.3.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 10)) (25.3.0) Requirement already satisfied: bidict==0.23.1 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 11)) (0.23.1) Requirement already satisfied: blessed==1.21.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 12)) (1.21.0) Requirement already satisfied: blinker==1.9.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 13)) (1.9.0) Requirement already satisfied: certifi==2025.8.3 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 14)) (2025.8.3) Requirement already satisfied: cffi==1.17.1 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 15)) (1.17.1) Requirement already satisfied: charset-normalizer==3.4.3 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 16)) (3.4.3) Requirement already satisfied: click==8.2.1 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 17)) (8.2.1) Requirement already satisfied: colorama==0.4.6 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 18)) (0.4.6) Requirement already satisfied: coloredlogs==15.0.1 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 19)) (15.0.1) Requirement already satisfied: contourpy==1.3.2 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 20)) (1.3.2) Requirement already satisfied: cryptography==42.0.4 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 21)) (42.0.4) Requirement already satisfied: cycler==0.12.1 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 22)) (0.12.1) Requirement already satisfied: diffusers==0.26.3 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 23)) (0.26.3) Requirement already satisfied: distro==1.9.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 24)) (1.9.0) Requirement already satisfied: exceptiongroup==1.3.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 25)) (1.3.0) Requirement already satisfied: fastapi==0.116.1 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 26)) (0.116.1) Requirement already satisfied: ffmpy==0.6.1 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 27)) (0.6.1) Requirement already satisfied: filelock==3.19.1 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 28)) (3.19.1) Requirement already satisfied: Flask==3.0.2 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 29)) (3.0.2) Requirement already satisfied: Flask-SocketIO==5.3.6 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 30)) (5.3.6) Requirement already satisfied: flatbuffers==25.2.10 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 31)) (25.2.10) Requirement already satisfied: fonttools==4.59.1 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 32)) (4.59.1) Requirement already satisfied: frozenlist==1.7.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 33)) (1.7.0) Requirement already satisfied: fsspec==2025.7.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 34)) (2025.7.0) Requirement already satisfied: gpustat==1.1 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 35)) (1.1) Requirement already satisfied: gradio==4.19.2 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 36)) (4.19.2) Requirement already satisfied: gradio_client==0.10.1 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 37)) (0.10.1) Requirement already satisfied: h11==0.16.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 38)) (0.16.0) Requirement already satisfied: httpcore==1.0.9 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 39)) (1.0.9) Requirement already satisfied: httpx==0.28.1 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 40)) (0.28.1) Requirement already satisfied: huggingface-hub==0.21.3 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 41)) (0.21.3) Requirement already satisfied: humanfriendly==10.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 42)) (10.0) Requirement already satisfied: idna==3.10 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 43)) (3.10) Requirement already satisfied: importlib_metadata==8.7.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 44)) (8.7.0) Requirement already satisfied: importlib_resources==6.5.2 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 45)) (6.5.2) Requirement already satisfied: itsdangerous==2.2.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 46)) (2.2.0) Requirement already satisfied: Jinja2==3.1.6 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 47)) (3.1.6) Requirement already satisfied: jinxed==1.3.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 48)) (1.3.0) Requirement already satisfied: jsonschema==4.25.1 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 49)) (4.25.1) Requirement already satisfied: jsonschema-specifications==2025.4.1 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 50)) (2025.4.1) Requirement already satisfied: kiwisolver==1.4.9 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 51)) (1.4.9) Requirement already satisfied: loguru==0.7.2 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 52)) (0.7.2) Requirement already satisfied: markdown-it-py==4.0.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 53)) (4.0.0) Requirement already satisfied: MarkupSafe==2.1.5 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 54)) (2.1.5) Requirement already satisfied: matplotlib==3.10.5 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 55)) (3.10.5) Requirement already satisfied: mdurl==0.1.2 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 56)) (0.1.2) Requirement already satisfied: mpmath==1.3.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 57)) (1.3.0) Requirement already satisfied: multidict==6.6.4 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 58)) (6.6.4) Requirement already satisfied: narwhals==2.1.2 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 59)) (2.1.2) Requirement already satisfied: networkx==3.4.2 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 60)) (3.4.2) Requirement already satisfied: numpy==1.26.3 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 61)) (1.26.3) Requirement already satisfied: nvidia-ml-py==13.580.65 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 62)) (13.580.65) Requirement already satisfied: onnxruntime==1.17.1 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 63)) (1.17.1) Requirement already satisfied: openai==1.13.3 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 64)) (1.13.3) Requirement already satisfied: orjson==3.11.2 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 65)) (3.11.2) Requirement already satisfied: packaging==25.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 66)) (25.0) Requirement already satisfied: pandas==2.1.4 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 67)) (2.1.4) Requirement already satisfied: pillow==10.4.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 68)) (10.4.0) Requirement already satisfied: prettytable==3.16.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 69)) (3.16.0) Requirement already satisfied: propcache==0.3.2 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 70)) (0.3.2) Requirement already satisfied: protobuf==6.32.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 71)) (6.32.0) Requirement already satisfied: psutil==5.9.7 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 72)) (5.9.7) Requirement already satisfied: pycparser==2.22 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 73)) (2.22) Requirement already satisfied: pydantic==2.11.7 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 74)) (2.11.7) Requirement already satisfied: pydantic_core==2.33.2 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 75)) (2.33.2) Requirement already satisfied: pydub==0.25.1 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 76)) (0.25.1) Requirement already satisfied: Pygments==2.19.2 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 77)) (2.19.2) Requirement already satisfied: pyparsing==3.2.3 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 78)) (3.2.3) Requirement already satisfied: pyreadline3==3.5.4 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 79)) (3.5.4) Requirement already satisfied: python-dateutil==2.9.0.post0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 80)) (2.9.0.post0) Requirement already satisfied: python-dotenv==1.0.1 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 81)) (1.0.1) Requirement already satisfied: python-engineio==4.12.2 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 82)) (4.12.2) Requirement already satisfied: python-multipart==0.0.20 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 83)) (0.0.20) Requirement already satisfied: python-socketio==5.13.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 84)) (5.13.0) Requirement already satisfied: pytz==2025.2 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 85)) (2025.2) Requirement already satisfied: pywin32==306 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 86)) (306) Requirement already satisfied: PyYAML==6.0.2 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 87)) (6.0.2) Requirement already satisfied: redis==5.0.3 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 88)) (5.0.3) Requirement already satisfied: referencing==0.36.2 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 89)) (0.36.2) Requirement already satisfied: regex==2025.7.34 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 90)) (2025.7.34) Requirement already satisfied: requests==2.31.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 91)) (2.31.0) Requirement already satisfied: rich==14.1.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 92)) (14.1.0) Requirement already satisfied: rpds-py==0.27.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 93)) (0.27.0) Requirement already satisfied: ruff==0.12.10 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 94)) (0.12.10) Requirement already satisfied: safetensors==0.4.2 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 95)) (0.4.2) Requirement already satisfied: semantic-version==2.10.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 96)) (2.10.0) Requirement already satisfied: shellingham==1.5.4 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 97)) (1.5.4) Requirement already satisfied: simple-websocket==1.1.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 98)) (1.1.0) Requirement already satisfied: six==1.17.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 99)) (1.17.0) Requirement already satisfied: sniffio==1.3.1 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 100)) (1.3.1) Requirement already satisfied: starlette==0.47.2 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 101)) (0.47.2) Requirement already satisfied: sympy==1.14.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 102)) (1.14.0) Requirement already satisfied: tokenizers==0.15.2 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 103)) (0.15.2) Requirement already satisfied: tomlkit==0.12.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 104)) (0.12.0) Requirement already satisfied: torch==2.1.2 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 105)) (2.1.2) Requirement already satisfied: tqdm==4.67.1 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 106)) (4.67.1) Requirement already satisfied: transformers==4.37.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 107)) (4.37.0) Requirement already satisfied: typer==0.16.1 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 108)) (0.16.1) Requirement already satisfied: typing-inspection==0.4.1 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 109)) (0.4.1) Requirement already satisfied: typing_extensions==4.14.1 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 110)) (4.14.1) Requirement already satisfied: tzdata==2025.2 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 111)) (2025.2) Requirement already satisfied: urllib3==2.5.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 112)) (2.5.0) Requirement already satisfied: uvicorn==0.35.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 113)) (0.35.0) Requirement already satisfied: waitress==2.1.2 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 114)) (2.1.2) Requirement already satisfied: wcwidth==0.2.13 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 115)) (0.2.13) Requirement already satisfied: websockets==11.0.3 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 116)) (11.0.3) Requirement already satisfied: Werkzeug==3.1.3 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 117)) (3.1.3) Requirement already satisfied: win32_setctime==1.2.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 118)) (1.2.0) Requirement already satisfied: wsproto==1.2.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 119)) (1.2.0) Requirement already satisfied: yarl==1.20.1 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 120)) (1.20.1) Requirement already satisfied: zipp==3.23.0 in e:\ai_system\venv\lib\site-packages (from -r requirements.txt (line 121)) (3.23.0) WARNING: typer 0.16.1 does not provide the extra 'all' [notice] A new release of pip available: 22.3.1 -> 25.2 [notice] To update, run: python.exe -m pip install --upgrade pip (venv) PS E:\AI_System> python diagnose_modules.py ============================================================ 模块文件诊断报告 ============================================================ 🔍 检查 CognitiveSystem 模块: 预期路径: E:\AI_System\agent\cognitive_architecture.py ✅ 文件存在 ⚠️ 文件中包含相对导入,可能导致导入错误 ✅ 找到类定义: class CognitiveSystem ✅ 类继承CognitiveModule ✅ 找到__init__方法 📋 初始化方法: def __init__(self, name): 🔍 检查 EnvironmentInterface 模块: 预期路径: E:\AI_System\agent\environment_interface.py ✅ 文件存在 ✅ 找到类定义: class EnvironmentInterface ✅ 类继承CognitiveModule ✅ 找到__init__方法 📋 初始化方法: def __init__(self, coordinator=None, config=None): 🔍 检查 AffectiveSystem 模块: 预期路径: E:\AI_System\agent\affective_system.py ✅ 文件存在 ✅ 找到类定义: class AffectiveSystem ✅ 类继承CognitiveModule ✅ 找到__init__方法 📋 初始化方法: def __init__(self, coordinator=None, config=None): ============================================================ 建议解决方案: ============================================================ 1. 检查每个模块文件中的相对导入语句 2. 确保每个模块类都正确继承CognitiveModule 3. 检查初始化方法的参数是否正确 4. 确保模块内部的导入使用绝对路径或正确处理相对导入 5. 考虑使用try-catch包装模块内部的导入语句 (venv) PS E:\AI_System> python tests/test_core_import.py 2025-08-27 20:50:46,505 - ImportTest - INFO - 脚本目录: E:\AI_System\tests 2025-08-27 20:50:46,505 - ImportTest - INFO - 项目根目录: E:\AI_System 2025-08-27 20:50:46,505 - ImportTest - INFO - 已将项目根目录添加到系统路径: E:\AI_System 2025-08-27 20:50:46,506 - CorePackage - INFO - 项目根目录: E:\AI_System 2025-08-27 20:50:51,497 - CorePackage - ERROR - ❌ 导入失败: No module named 'models.base_model' 2025-08-27 20:50:51,497 - CorePackage - WARNING - ⚠️ 创建占位符CognitiveModule 2025-08-27 20:50:51,505 - CoreConfig - INFO - 📂 从 E:\AI_System\config\default.json 加载配置: {'LOG_DIR': 'E:/AI_System/logs', 'CONFIG_DIR': 'E:/AI_System/config', 'MODEL_CACHE_DIR': 'E:/AI_System/model_cache', 'AGENT_NAME': '小蓝', 'DEFAULT_USER': '管理员', 'MAX_WORKERS': 4, 'AGENT_RESPONSE_TIMEOUT': 30.0, 'MODEL_BASE_PATH': 'E:/AI_Models', 'MODEL_PATHS': {'TEXT_BASE': 'E:/AI_Models/Qwen2-7B', 'TEXT_CHAT': 'E:/AI_Models/deepseek-7b-chat', 'MULTIMODAL': 'E:/AI_Models/deepseek-vl2', 'IMAGE_GEN': 'E:/AI_Models/sdxl', 'YI_VL': 'E:/AI_Models/yi-vl', 'STABLE_DIFFUSION': 'E:/AI_Models/stable-diffusion-xl-base-1.0'}, 'NETWORK': {'HOST': '0.0.0.0', 'FLASK_PORT': 8000, 'GRADIO_PORT': 7860}, 'DATABASE': {'DB_HOST': 'localhost', 'DB_PORT': 5432, 'DB_NAME': 'ai_system', 'DB_USER': 'ai_user', 'DB_PASSWORD': 'secure_password_here'}, 'SECURITY': {'SECRET_KEY': 'generated-secret-key-here'}, 'ENVIRONMENT': {'ENV': 'dev', 'LOG_LEVEL': 'DEBUG', 'USE_GPU': True}, 'DIRECTORIES': {'DEFAULT_MODEL': 'E:/AI_Models/Qwen2-7B', 'WEB_UI_DIR': 'E:/AI_System/web_ui', 'AGENT_DIR': 'E:/AI_System/agent'}} 2025-08-27 20:50:51,505 - CoreConfig - INFO - 📂 从 E:\AI_System\config\default.json 加载配置: {'LOG_DIR': 'E:/AI_System/logs', 'CONFIG_DIR': 'E:/AI_System/config', 'MODEL_CACHE_DIR': 'E:/AI_System/model_cache', 'AGENT_NAME': '小蓝', 'DEFAULT_USER': '管理员', 'MAX_WORKERS': 4, 'AGENT_RESPONSE_TIMEOUT': 30.0, 'MODEL_BASE_PATH': 'E:/AI_Models', 'MODEL_PATHS': {'TEXT_BASE': 'E:/AI_Models/Qwen2-7B', 'TEXT_CHAT': 'E:/AI_Models/deepseek-7b-chat', 'MULTIMODAL': 'E:/AI_Models/deepseek-vl2', 'IMAGE_GEN': 'E:/AI_Models/sdxl', 'YI_VL': 'E:/AI_Models/yi-vl', 'STABLE_DIFFUSION': 'E:/AI_Models/stable-diffusion-xl-base-1.0'}, 'NETWORK': {'HOST': '0.0.0.0', 'FLASK_PORT': 8000, 'GRADIO_PORT': 7860}, 'DATABASE': {'DB_HOST': 'localhost', 'DB_PORT': 5432, 'DB_NAME': 'ai_system', 'DB_USER': 'ai_user', 'DB_PASSWORD': 'secure_password_here'}, 'SECURITY': {'SECRET_KEY': 'generated-secret-key-here'}, 'ENVIRONMENT': {'ENV': 'dev', 'LOG_LEVEL': 'DEBUG', 'USE_GPU': True}, 'DIRECTORIES': {'DEFAULT_MODEL': 'E:/AI_Models/Qwen2-7B', 'WEB_UI_DIR': 'E:/AI_System/web_ui', 'AGENT_DIR': 'E:/AI_System/agent'}} 2025-08-27 20:50:51,505 - CoreConfig - INFO - 📂 从 E:\AI_System\config\local.json 加载配置: {} 2025-08-27 20:50:51,505 - CoreConfig - INFO - 📂 从 E:\AI_System\config\local.json 加载配置: {} 2025-08-27 20:50:51,506 - CoreConfig - INFO - 🌐 从 E:\AI_System\.env 加载环境变量 2025-08-27 20:50:51,506 - CoreConfig - INFO - 🌐 从 E:\AI_System\.env 加载环境变量 2025-08-27 20:50:51,506 - CoreConfig - INFO - 🔄 环境变量覆盖: AGENT_DIR=E:/AI_System/agent 2025-08-27 20:50:51,506 - CoreConfig - INFO - 🔄 环境变量覆盖: AGENT_DIR=E:/AI_System/agent 2025-08-27 20:50:51,506 - CoreConfig - INFO - 🔄 环境变量覆盖: WEB_UI_DIR=E:/AI_System/web_ui 2025-08-27 20:50:51,506 - CoreConfig - INFO - 🔄 环境变量覆盖: WEB_UI_DIR=E:/AI_System/web_ui 2025-08-27 20:50:51,506 - CoreConfig - INFO - ✅ 配置系统初始化完成 2025-08-27 20:50:51,506 - CoreConfig - INFO - ✅ 配置系统初始化完成 2025-08-27 20:50:51,506 - ImportTest - ERROR - ❌ 测试过程中发生错误: cannot import name 'utils' from partially initialized module 'core' (most likely due to a circular import) (E:\AI_System\core\__init__.py) 2025-08-27 20:50:51,506 - ImportTest - ERROR - 详细堆栈跟踪: 2025-08-27 20:50:51,506 - ImportTest - ERROR - Traceback (most recent call last): File "E:\AI_System\tests\test_core_import.py", line 29, in <module> from core import CognitiveModule File "E:\AI_System\core\__init__.py", line 37, in <module> from . import utils ImportError: cannot import name 'utils' from partially initialized module 'core' (most likely due to a circular import) (E:\AI_System\core\__init__.py) (venv) PS E:\AI_System> python diagnose_architecture.py ❌ 导入失败: No module named 'models.base_model' ⚠️ 创建占位符CognitiveModule 2025-08-27 20:50:57,088 - CoreConfig - INFO - 📂 从 E:\AI_System\config\default.json 加载配置: {'LOG_DIR': 'E:/AI_System/logs', 'CONFIG_DIR': 'E:/AI_System/config', 'MODEL_CACHE_DIR': 'E:/AI_System/model_cache', 'AGENT_NAME': '小蓝', 'DEFAULT_USER': '管理员', 'MAX_WORKERS': 4, 'AGENT_RESPONSE_TIMEOUT': 30.0, 'MODEL_BASE_PATH': 'E:/AI_Models', 'MODEL_PATHS': {'TEXT_BASE': 'E:/AI_Models/Qwen2-7B', 'TEXT_CHAT': 'E:/AI_Models/deepseek-7b-chat', 'MULTIMODAL': 'E:/AI_Models/deepseek-vl2', 'IMAGE_GEN': 'E:/AI_Models/sdxl', 'YI_VL': 'E:/AI_Models/yi-vl', 'STABLE_DIFFUSION': 'E:/AI_Models/stable-diffusion-xl-base-1.0'}, 'NETWORK': {'HOST': '0.0.0.0', 'FLASK_PORT': 8000, 'GRADIO_PORT': 7860}, 'DATABASE': {'DB_HOST': 'localhost', 'DB_PORT': 5432, 'DB_NAME': 'ai_system', 'DB_USER': 'ai_user', 'DB_PASSWORD': 'secure_password_here'}, 'SECURITY': {'SECRET_KEY': 'generated-secret-key-here'}, 'ENVIRONMENT': {'ENV': 'dev', 'LOG_LEVEL': 'DEBUG', 'USE_GPU': True}, 'DIRECTORIES': {'DEFAULT_MODEL': 'E:/AI_Models/Qwen2-7B', 'WEB_UI_DIR': 'E:/AI_System/web_ui', 'AGENT_DIR': 'E:/AI_System/agent'}} 2025-08-27 20:50:57,088 - CoreConfig - INFO - 📂 从 E:\AI_System\config\local.json 加载配置: {} 2025-08-27 20:50:57,088 - CoreConfig - INFO - 🌐 从 E:\AI_System\.env 加载环境变量 2025-08-27 20:50:57,088 - CoreConfig - INFO - 🔄 环境变量覆盖: AGENT_DIR=E:/AI_System/agent 2025-08-27 20:50:57,088 - CoreConfig - INFO - 🔄 环境变量覆盖: WEB_UI_DIR=E:/AI_System/web_ui 2025-08-27 20:50:57,088 - CoreConfig - INFO - ✅ 配置系统初始化完成 Traceback (most recent call last): File "E:\AI_System\diagnose_architecture.py", line 8, in <module> from core.module_registry import validate_module_structure File "E:\AI_System\core\__init__.py", line 37, in <module> from . import utils ImportError: cannot import name 'utils' from partially initialized module 'core' (most likely due to a circular import) (E:\AI_System\core\__init__.py) (venv) PS E:\AI_System>”

最新推荐

recommend-type

Excel表格模板:各级学校考试成绩查询系统(智能统计-打开即用).xlsx

Excel表格模板:各级学校考试成绩查询系统(智能统计-打开即用).xlsx
recommend-type

【scratch2.0少儿编程-游戏原型-动画-项目源码】快速计算器.zip

资源说明: 1:本资料仅用作交流学习参考,请切勿用于商业用途。更多精品资源请访问 https://blog.csdn.net/ashyyyy/article/details/146464041 2:一套精品实用scratch2.0少儿编程游戏、动画源码资源,无论是入门练手还是项目复用都超实用,省去重复开发时间,让开发少走弯路!
recommend-type

client-dynamodb-2024.01.10.193640-8902361-javadoc.jar

client-dynamodb-2024.01.10.193640-8902361-javadoc.jar
recommend-type

【scratch2.0少儿编程-游戏原型-动画-项目源码】贪食鱼游戏.zip

资源说明: 1:本资料仅用作交流学习参考,请切勿用于商业用途。更多精品资源请访问 https://blog.csdn.net/ashyyyy/article/details/146464041 2:一套精品实用scratch2.0少儿编程游戏、动画源码资源,无论是入门练手还是项目复用都超实用,省去重复开发时间,让开发少走弯路!
recommend-type

研究Matlab影响下的神经数值可复制性

### Matlab代码影响神经数值可复制性 #### 标题解读 标题为“matlab代码影响-neural-numerical-replicability:神经数值可复制性”,该标题暗示了研究的主题集中在Matlab代码对神经数值可复制性的影响。在神经科学研究中,数值可复制性指的是在不同计算环境下使用相同的算法与数据能够获得一致或相近的计算结果。这对于科学实验的可靠性和结果的可验证性至关重要。 #### 描述解读 描述中提到的“该项目”着重于提供工具来分析不同平台下由于数值不精确性导致的影响。项目以霍奇金-赫克斯利(Hodgkin-Huxley)型神经元组成的简单神经网络为例,这是生物物理神经建模中常见的模型,用于模拟动作电位的产生和传播。 描述中提及的`JCN_2019_v4.0_appendix_Eqs_Parameters.pdf`文件详细描述了仿真模型的参数与方程。这些内容对于理解模型的细节和确保其他研究者复制该研究是必不可少的。 该研究的实现工具选用了C/C++程序语言。这表明了研究的复杂性和对性能的高要求,因为C/C++在科学计算领域内以其高效性和灵活性而广受欢迎。 使用了Runge–Kutta四阶方法(RK4)求解常微分方程(ODE),这是一种广泛应用于求解初值问题的数值方法。RK4方法的精度和稳定性使其成为众多科学计算问题的首选。RK4方法的实现借助了Boost C++库中的`Boost.Numeric.Odeint`模块,这进一步表明项目对数值算法的实现和性能有较高要求。 #### 软件要求 为了能够运行该项目,需要满足一系列软件要求: - C/C++编译器:例如GCC,这是编译C/C++代码的重要工具。 - Boost C++库:一个强大的跨平台C++库,提供了许多标准库之外的组件,尤其是数值计算相关的部分。 - ODEint模块:用于求解常微分方程,是Boost库的一部分,已包含在项目提供的文件中。 #### 项目文件结构 从提供的文件列表中,我们可以推测出项目的文件结构包含以下几个部分: - **项目树源代码目录**:存放项目的主要源代码文件。 - `checkActualPrecision.h`:一个头文件,可能用于检测和评估实际的数值精度。 - `HH_BBT2017_allP.cpp`:源代码文件,包含用于模拟霍奇金-赫克斯利神经元网络的代码。 - `iappDist_allP.cpp` 和 `iappDist_allP.h`:源代码和头文件,可能用于实现某种算法或者数据的分布。 - `Makefile.win`:针对Windows系统的编译脚本文件,用于自动化编译过程。 - `SpikeTrain_allP.cpp` 和 `SpikeTrain_allP.h`:源代码和头文件,可能与动作电位的生成和传播相关。 - **人物目录**:可能包含项目成员的简介、联系方式或其他相关信息。 - **Matlab脚本文件**: - `图1_as.m`、`图2_as.m`、`图2_rp`:这些文件名中的"as"可能表示"assembled",而"rp"可能指"reproduction"。这些脚本文件很可能用于绘制图表、图形,以及对模拟结果进行后处理和复现实验。 #### 开源系统标签 标签“系统开源”指的是该项目作为一个开源项目被开发,意味着其源代码是公开的,任何个人或组织都可以自由获取、修改和重新分发。这对于科学计算来说尤为重要,因为开放代码库可以增进协作,加速科学发现,并确保实验结果的透明度和可验证性。 #### 总结 在理解了文件中提供的信息后,可以认识到本项目聚焦于通过提供准确的数值计算工具,来保证神经科学研究中模型仿真的可复制性。通过选择合适的编程语言和算法,利用开源的库和工具,研究者们可以确保其研究结果的精确性和可靠性。这不仅有助于神经科学领域的深入研究,还为其他需要高精度数值计算的科研领域提供了宝贵的经验和方法。
recommend-type

MySQL数据库索引失效案例分析与解决方案(索引失效大揭秘)

# 摘要 MySQL索引失效是数据库性能优化中的关键问题,直接影响查询效率与系统响应速度。本文系统分析了索引的基本机制与失效原理,包括B+树结构、执行计划解析及查询优化器的工作逻辑,深入探讨了索引失效的典型场景,如不规范SQL写法、复合索引设计不当以及统
recommend-type

TS语言

### TypeScript 简介 TypeScript 是一种由 Microsoft 开发的开源编程语言,它是 JavaScript 的超集,这意味着所有的 JavaScript 代码都是合法的 TypeScript 代码。TypeScript 扩展了 JavaScript 的语法,并通过类型注解提供编译时的静态类型检查,从而使得代码更易于维护、理解和调试。TypeScript 可以在任何操作系统上运行,并且可以编译出纯净、简洁的 JavaScript 代码,这些代码可以在任何浏览器上、Node.js 环境中,或者任何支持 ECMAScript 3(或更高版本)的 JavaScript 引
recommend-type

Leaflet.Graticule插件:创建经纬度网格刻度

标题“Leaflet.Graticule:经纬线网格”指向的是Leaflet.js的一个插件,它用于在地图上生成经纬度网格线,以辅助进行地图定位与参考。从描述中,我们可以提取到几个关键知识点: 1. Leaflet.Graticule插件的使用目的和功能:该插件的主要作用是在基于Leaflet.js库的地图上绘制经纬度网格线。这可以帮助用户在地图上直观地看到经纬度划分,对于地理信息系统(GIS)相关工作尤为重要。 2. 插件的构造函数和参数:`L.graticule(options)`是创建Graticule图层的JavaScript代码片段。其中`options`是一个对象,可以用来设置网格线的显示样式和间隔等属性。这表明了插件的灵活性,允许用户根据自己的需求调整网格线的显示。 3. interval参数的含义:`interval`参数决定了网格线的间隔大小,以度为单位。例如,若设置为20,则每20度间隔显示一条网格线;若设置为10,则每10度显示一条网格线。这一参数对于调节网格线密度至关重要。 4. style参数的作用:`style`参数用于定义网格线的样式。插件提供了自定义线的样式的能力,包括颜色、粗细等,使得开发者可以根据地图的整体风格和个人喜好来定制网格线的外观。 5. 实例化和添加到地图上的例子:提供了两种使用插件的方式。第一种是直接创建一个基本的网格层并将其添加到地图上,这种方式使用了插件的默认设置。第二种是创建一个自定义间隔的网格层,并同样将其添加到地图上。这展示了如何在不同的使用场景下灵活运用插件。 6. JavaScript标签的含义:标题中“JavaScript”这一标签强调了该插件是使用JavaScript语言开发的,它是前端技术栈中重要的部分,特别是在Web开发中扮演着核心角色。 7. 压缩包子文件的文件名称列表“Leaflet.Graticule-master”暗示了插件的项目文件结构。文件名表明,这是一个典型的GitHub仓库的命名方式,其中“master”可能代表主分支。通常,开发者可以在如GitHub这样的代码托管平台上找到该项目的源代码和文档,以便下载、安装和使用。 综上所述,可以得知,Leaflet.Graticule插件是一个专为Leaflet地图库设计的扩展工具,它允许用户添加自定义的经纬度网格线到地图上,以帮助进行地图的可视化分析。开发者可以根据特定需求通过参数化选项来定制网格线的属性,使其适应不同的应用场景。通过学习和使用该插件,可以增强地图的交互性和信息的传递效率。
recommend-type

【MySQL数据库性能提升秘籍】:揭秘性能下降幕后真凶及解决策略

# 摘要 MySQL性能问题在实际应用中普遍存在,但其表象复杂且易引发认知误区。本文系统分析了导致MySQL性能下降的核心原因,涵盖查询语句结构、数据库配置、表结构设计等多个技术层面,并结合性能监控工具与执行计划解析,提供了全面的问题诊断方法。在此基础上,文章深入探讨了索引优化、查询重写、分库分表等高级调优策略,并通过真实案例总结了可行的最佳实践
recommend-type

51小车循迹红外

基于51单片机的红外循迹小车的实现方法,主要涉及硬件连接、传感器模块的使用以及程序设计三个方面。 ### 红外循迹模块的选择与连接 红外循迹模块通常由多个红外发射和接收对管组成,用于检测地面上的黑线。常见的模块有四路红外循迹模块,其工作原理是通过检测红外光的反射强度来判断是否处于黑线上。红外模块的VCC和GND分别连接到51单片机的+5V和GND端,而IN1至IN4则连接到单片机的对应引脚上。红外发射接收器应安装在小车前方下端,并且离地面的距离不宜过远,以确保能够有效检测到黑线[^2]。 ### 硬件电路设计 在硬件设计方面,需要考虑电机驱动、电源管理、以及红外传感器的接口设计。51单片机