CentOS 7 安装配置 JDK 1.8 和 Maven 环境的详细教程

import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from pyspark.sql import SparkSession from pyspark.ml.feature import VectorAssembler, StringIndexer, OneHotEncoder from pyspark.ml import Pipeline from pyspark.ml.classification import LogisticRegression, DecisionTreeClassifier, RandomForestClassifier from pyspark.ml.evaluation import BinaryClassificationEvaluator, MulticlassClassificationEvaluator import os import time import warnings import tempfile import shutil import traceback # 忽略警告 warnings.filterwarnings("ignore") # 设置中文字体 plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False # 页面设置 st.set_page_config( page_title="精准营销系统", page_icon="📊", layout="wide", initial_sidebar_state="expanded" ) # 自定义CSS样式 st.markdown(""" <style> .stApp { background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); font-family: 'Helvetica Neue', Arial, sans-serif; } .header { background: linear-gradient(90deg, #1a237e 0%, #283593 100%); color: white; padding: 1.5rem; border-radius: 0.75rem; box-shadow: 0 4px 12px rgba(0,0,0,0.1); margin-bottom: 2rem; } .card { background: white; border-radius: 0.75rem; padding: 1rem; margin-bottom: 1.5rem; box-shadow: 0 4px 12px rgba(0,0,0,0.1); transition: transform 0.3s ease; } .card:hover { transform: translateY(-5px); box-shadow: 0 6px 16px rgba(0,0,0,0.12); } .stButton button { background: linear-gradient(90deg, #3949ab 0%, #1a237e 100%) !important; color: white !important; border: none !important; border-radius: 0.5rem; padding: 0.75rem 1.5rem; font-size: 1rem; font-weight: 600; transition: all 0.3s ease; width: 100%; } .stButton button:hover { transform: scale(1.05); box-shadow: 0 4px 8px rgba(57, 73, 171, 0.4); } .feature-box { background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%); border-radius: 0.75rem; padding: 1.5rem; margin-bottom: 1.5rem; } .result-box { background: linear-gradient(135deg, #e8f5e9 0%, #c8e6c9 100%); border-radius: 0.75rem; padding: 1.5rem; margin-top: 1.5rem; } .model-box { background: linear-gradient(135deg, #fff3e0 0%, #ffe0b2 100%); border-radius: 0.75rem; padding: 1.5rem; margin-top: 1.5rem; } .stProgress > div > div > div { background: linear-gradient(90deg, #2ecc71 0%, #27ae60 100%) !important; } .metric-card { background: white; border-radius: 0.75rem; padding: 1rem; text-align: center; box-shadow: 0 4px 8px rgba(0,0,0,0.06); } .metric-value { font-size: 1.8rem; font-weight: 700; color: #1a237e; } .metric-label { font-size: 0.9rem; color: #5c6bc0; margin-top: 0.5rem; } .highlight { background: linear-gradient(90deg, #ffeb3b 0%, #fbc02d 100%); padding: 0.2rem 0.5rem; border-radius: 0.25rem; font-weight: 600; } .stDataFrame { border-radius: 0.75rem; box-shadow: 0 4px 8px rgba(0,0,0,0.06); } .convert-high { background-color: #c8e6c9 !important; color: #388e3c !important; font-weight: 700; } .convert-low { background-color: #ffcdd2 !important; color: #c62828 !important; font-weight: 600; } .java-success { background-color: #d4edda; border-left: 4px solid #28a745; padding: 1rem; margin-bottom: 1.5rem; border-radius: 0 0.25rem 0.25rem 0; } </style> """, unsafe_allow_html=True) # 创建优化的Spark会话 def create_spark_session(): """创建优化的Spark会话，使用高效的配置参数""" try: # 基础配置 - 优化资源使用 builder = SparkSession.builder \ .appName("TelecomPrecisionMarketing") \ .config("spark.driver.memory", "1g") \ .config("spark.executor.memory", "1g") \ .config("spark.sql.shuffle.partitions", "4") \ .config("spark.network.timeout", "800s") \ .config("spark.executor.heartbeatInterval", "60s") \ .config("spark.sql.legacy.allowUntypedScalaUDF", "true") \ .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") \ .config("spark.kryoserializer.buffer.max", "128m") \ .config("spark.driver.maxResultSize", "1g") \ .config("spark.sql.execution.arrow.pyspark.enabled", "true") \ .config("spark.ui.showConsoleProgress", "false") # 创建会话 spark = builder.getOrCreate() # 验证会话 try: # 创建测试DataFrame验证会话是否正常工作 test_df = spark.createDataFrame([(1, "test"), (2, "session")], ["id", "name"]) test_df_count = test_df.count() if test_df_count == 2: st.success("Spark会话创建成功并验证通过") else: st.warning(f"Spark会话验证异常，返回记录数: {test_df_count}") except Exception as e: st.error(f"Spark会话验证失败: {str(e)}") spark.stop() raise return spark except Exception as e: st.error(f"创建Spark会话失败: {str(e)}") st.error("请检查Java版本和Spark配置") st.stop() # 数据预处理函数 def preprocess_data(df): """优化后的数据预处理函数""" # 1. 选择关键特征 available_features = [col for col in df.columns if col in [ 'AGE', 'GENDER', 'ONLINE_DAY', 'TERM_CNT', 'IF_YHTS', 'MKT_STAR_GRADE_NAME', 'PROM_AMT_MONTH', 'is_rh_next' # 目标变量 ]] # 确保目标变量存在 if 'is_rh_next' not in available_features: st.error("错误：数据集中缺少目标变量 'is_rh_next'") return df # 只保留需要的列 df = df[available_features].copy() # 2. 处理缺失值 numeric_cols = ['AGE', 'ONLINE_DAY', 'TERM_CNT', 'PROM_AMT_MONTH'] for col in numeric_cols: if col in df.columns: median_val = df[col].median() df[col].fillna(median_val, inplace=True) categorical_cols = ['GENDER', 'MKT_STAR_GRADE_NAME', 'IF_YHTS'] for col in categorical_cols: if col in df.columns: mode_val = df[col].mode()[0] if not df[col].mode().empty else '未知' df[col].fillna(mode_val, inplace=True) # 3. 异常值处理 def handle_outliers(series): Q1 = series.quantile(0.25) Q3 = series.quantile(0.75) IQR = Q3 - Q1 lower_bound = Q1 - 1.5 * IQR upper_bound = Q3 + 1.5 * IQR return series.clip(lower_bound, upper_bound) for col in numeric_cols: if col in df.columns: df[col] = handle_outliers(df[col]) return df # 保存模型函数 def save_model(model, model_path): """保存模型到指定路径""" try: # 确保目录存在 os.makedirs(model_path, exist_ok=True) # 保存模型 model.write().overwrite().save(model_path) return True except Exception as e: st.error(f"保存模型失败: {str(e)}") st.error(traceback.format_exc()) return False # 标题区域 st.markdown("""

精准营销系统

基于机器学习的单宽转融预测

""", unsafe_allow_html=True) # 页面布局 col1, col2 = st.columns([1, 1.5]) # 左侧区域 - 图片和简介 with col1: st.markdown("""

📱 智能营销系统

预测单宽带用户转化为融合套餐用户的可能性

""", unsafe_allow_html=True) # 使用在线图片作为占位符 - 修复了use_column_width参数 st.image("https://images.unsplash.com/photo-1551836022-d5d88e9218df?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1200&q=80", caption="精准营销系统示意图", use_container_width=True) st.markdown("""

📈 系统功能

用户转化可能性预测

高精度机器学习模型

可视化数据分析

精准营销策略制定

""", unsafe_allow_html=True) # 右侧区域 - 功能选择 with col2: st.markdown("""

📋 请选择操作类型

您可以选择数据分析或使用模型进行预测

""", unsafe_allow_html=True) # 功能选择 option = st.radio("操作类型", ["📊 数据分析 - 探索数据并训练模型", "🔍 预测分析 - 预测用户转化可能性"], index=0) # 数据分析部分 if "数据分析" in option: st.markdown("""

数据分析与模型训练

上传数据并训练预测模型

""", unsafe_allow_html=True) # 上传训练数据 train_file = st.file_uploader("上传数据集 (CSV格式, GBK编码)", type=["csv"]) if train_file is not None: try: # 读取数据 train_data = pd.read_csv(train_file, encoding='GBK') # 显示数据预览 with st.expander("数据预览", expanded=True): st.dataframe(train_data.head()) col1, col2 = st.columns(2) col1.metric("总样本数", train_data.shape[0]) col2.metric("特征数量", train_data.shape[1] - 1) # 数据预处理 st.subheader("数据预处理") with st.spinner("数据预处理中..."): processed_data = preprocess_data(train_data) st.success("✅ 数据预处理完成") # 可视化数据分布 st.subheader("数据分布分析") # 目标变量分布 st.markdown("目标变量分布 (is_rh_next)") fig, ax = plt.subplots(figsize=(8, 5)) sns.countplot(x='is_rh_next', data=processed_data, palette='viridis') plt.title('用户转化分布 (0:未转化, 1:转化)') plt.xlabel('是否转化') plt.ylabel('用户数量') st.pyplot(fig) # 数值特征分布 st.markdown("数值特征分布") numeric_cols = ['AGE', 'ONLINE_DAY', 'TERM_CNT', 'PROM_AMT_MONTH'] # 动态计算子图布局 num_features = len(numeric_cols) if num_features > 0: ncols = 2 nrows = (num_features + ncols - 1) // ncols fig, axes = plt.subplots(nrows, ncols, figsize=(14, 4*nrows)) if nrows > 1 or ncols > 1: axes = axes.flatten() else: axes = [axes] for i, col in enumerate(numeric_cols): if col in processed_data.columns and i < len(axes): sns.histplot(processed_data[col], kde=True, ax=axes[i], color='skyblue') axes[i].set_title(f'{col}分布') axes[i].set_xlabel('') for j in range(i+1, len(axes)): axes[j].set_visible(False) plt.tight_layout() st.pyplot(fig) else: st.warning("没有可用的数值特征") # 特征相关性分析 st.markdown("特征相关性热力图") corr_cols = numeric_cols + ['is_rh_next'] if len(corr_cols) > 1: corr_data = processed_data[corr_cols].corr() fig, ax = plt.subplots(figsize=(12, 8)) sns.heatmap(corr_data, annot=True, fmt=".2f", cmap='coolwarm', ax=ax) plt.title('特征相关性热力图') st.pyplot(fig) else: st.warning("特征不足，无法生成相关性热力图") # 模型训练 st.subheader("模型训练") # 训练参数设置 col1, col2 = st.columns(2) test_size = col1.slider("测试集比例", 0.1, 0.4, 0.2, 0.05) random_state = col2.number_input("随机种子", 0, 100, 42) # 开始训练按钮 if st.button("开始训练模型", use_container_width=True): # 创建临时目录用于存储模型 with tempfile.TemporaryDirectory() as tmp_dir: # 修复路径问题：使用绝对路径 model_path = os.path.abspath(os.path.join(tmp_dir, "best_model")) progress_bar = st.progress(0) status_text = st.empty() # 步骤1: 创建Spark会话 status_text.text("步骤1/7: 初始化Spark会话...") spark = create_spark_session() progress_bar.progress(15) # 步骤2: 转换为Spark DataFrame status_text.text("步骤2/7: 转换数据为Spark格式...") spark_df = spark.createDataFrame(processed_data) progress_bar.progress(30) # 步骤3: 划分训练集和测试集 status_text.text("步骤3/7: 划分训练集和测试集...") train_df, test_df = spark_df.randomSplit([1.0 - test_size, test_size], seed=random_state) progress_bar.progress(40) # 步骤4: 特征工程 status_text.text("步骤4/7: 特征工程处理...") categorical_cols = ['GENDER', 'MKT_STAR_GRADE_NAME', 'IF_YHTS'] existing_cat_cols = [col for col in categorical_cols if col in processed_data.columns] # 创建特征处理管道 indexers = [StringIndexer(inputCol=col, outputCol=col+"_index") for col in existing_cat_cols] encoders = [OneHotEncoder(inputCol=col+"_index", outputCol=col+"_encoded") for col in existing_cat_cols] numeric_cols = ['AGE', 'ONLINE_DAY', 'TERM_CNT', 'PROM_AMT_MONTH'] feature_cols = numeric_cols + [col+"_encoded" for col in existing_cat_cols] assembler = VectorAssembler(inputCols=feature_cols, outputCol="features") label_indexer = StringIndexer(inputCol="is_rh_next", outputCol="label") progress_bar.progress(50) # 步骤5: 构建模型 status_text.text("步骤5/7: 构建和训练模型...") # 使用优化的模型配置 rf = RandomForestClassifier( featuresCol="features", labelCol="label", numTrees=50, # 增加树的数量提高精度 maxDepth=5, # 适当增加深度 seed=random_state, featureSubsetStrategy="auto", # 自动选择特征子集策略 impurity="gini" # 使用基尼不纯度 ) pipeline = Pipeline(stages=indexers + encoders + [assembler, label_indexer, rf]) model = pipeline.fit(train_df) progress_bar.progress(80) # 步骤6: 评估模型 status_text.text("步骤6/7: 评估模型性能...") predictions = model.transform(test_df) evaluator_auc = BinaryClassificationEvaluator(labelCol="label", rawPredictionCol="rawPrediction") evaluator_acc = MulticlassClassificationEvaluator(labelCol="label", predictionCol="prediction", metricName="accuracy") evaluator_f1 = MulticlassClassificationEvaluator(labelCol="label", predictionCol="prediction", metricName="f1") auc = evaluator_auc.evaluate(predictions) acc = evaluator_acc.evaluate(predictions) f1 = evaluator_f1.evaluate(predictions) results = { "Random Forest": {"AUC": auc, "Accuracy": acc, "F1 Score": f1} } progress_bar.progress(95) # 步骤7: 保存结果 status_text.text("步骤7/7: 保存模型和结果...") # 保存模型 if save_model(model, model_path): st.session_state.model_results = results st.session_state.best_model = model st.session_state.model_path = model_path st.session_state.spark = spark progress_bar.progress(100) st.success("🎉 模型训练完成！") # 显示模型性能 st.subheader("模型性能评估") results_df = pd.DataFrame(results).T st.dataframe(results_df.style.format("{:.4f}").background_gradient(cmap='Blues')) # 特征重要性 st.subheader("特征重要性") rf_model = model.stages[-1] feature_importances = rf_model.featureImportances.toArray() importance_df = pd.DataFrame({ "Feature": feature_cols, "Importance": feature_importances }).sort_values("Importance", ascending=False).head(10) fig, ax = plt.subplots(figsize=(10, 6)) sns.barplot(x="Importance", y="Feature", data=importance_df, palette="viridis", ax=ax) plt.title('Top 10 重要特征') st.pyplot(fig) # 提供模型下载 st.subheader("模型持久化") st.info("模型已保存到临时目录，建议下载保存到本地") # 创建模型压缩包 with st.spinner("准备模型下载包..."): zip_path = shutil.make_archive( base_name=os.path.join(tmp_dir, "marketing_model"), format='zip', root_dir=model_path ) with open(zip_path, "rb") as f: st.download_button( label="下载完整模型", data=f, file_name="marketing_model.zip", mime="application/zip" ) else: st.error("❌ 模型保存失败，请查看错误日志") except Exception as e: st.error(f"模型训练错误: {str(e)}") st.error(traceback.format_exc()) st.error("提示：请检查数据格式和特征列名") # 预测分析部分 else: st.markdown("""

用户转化预测

预测单宽带用户转化为融合套餐的可能性

""", unsafe_allow_html=True) # 上传预测数据 predict_file = st.file_uploader("上传预测数据 (CSV格式, GBK编码)", type=["csv"]) if predict_file is not None: try: # 读取数据 predict_data = pd.read_csv(predict_file, encoding='GBK') # 显示数据预览 with st.expander("数据预览", expanded=True): st.dataframe(predict_data.head()) # 检查是否有模型 if "best_model" not in st.session_state: st.warning("⚠️ 未找到训练好的模型，请先训练模型") st.stop() # 开始预测按钮 if st.button("开始预测", use_container_width=True): progress_bar = st.progress(0) status_text = st.empty() # 步骤1: 数据预处理 status_text.text("步骤1/4: 数据预处理中...") processed_data = preprocess_data(predict_data) progress_bar.progress(25) # 步骤2: 创建Spark会话 status_text.text("步骤2/4: 初始化Spark会话...") if "spark" not in st.session_state: spark = create_spark_session() st.session_state.spark = spark else: spark = st.session_state.spark progress_bar.progress(50) # 步骤3: 预测 status_text.text("步骤3/4: 进行预测...") spark_df = spark.createDataFrame(processed_data) best_model = st.session_state.best_model predictions = best_model.transform(spark_df) progress_bar.progress(75) # 步骤4: 处理结果 status_text.text("步骤4/4: 处理预测结果...") predictions_df = predictions.select( "CCUST_ROW_ID", "probability", "prediction" ).toPandas() # 解析概率值 predictions_df['转化概率'] = predictions_df['probability'].apply(lambda x: float(x[1])) predictions_df['预测结果'] = predictions_df['prediction'].apply(lambda x: "可能转化" if x == 1.0 else "可能不转化") # 添加转化可能性等级 predictions_df['转化可能性'] = pd.cut( predictions_df['转化概率'], bins=[0, 0.3, 0.7, 1], labels=["低可能性", "中可能性", "高可能性"] ) # 保存结果 st.session_state.prediction_results = predictions_df progress_bar.progress(100) st.success("✅ 预测完成！") except Exception as e: st.error(f"预测错误: {str(e)}") st.error(traceback.format_exc()) # 显示预测结果 if "prediction_results" in st.session_state: st.markdown("""

预测结果

用户转化可能性评估报告

""", unsafe_allow_html=True) result_df = st.session_state.prediction_results # 转化可能性分布 st.subheader("转化可能性分布概览") col1, col2, col3 = st.columns(3) high_conv = (result_df["转化可能性"] == "高可能性").sum() med_conv = (result_df["转化可能性"] == "中可能性").sum() low_conv = (result_df["转化可能性"] == "低可能性").sum() col1.markdown(f"""

{high_conv}

高可能性用户

""", unsafe_allow_html=True) col2.markdown(f"""

{med_conv}

中可能性用户

""", unsafe_allow_html=True) col3.markdown(f"""

{low_conv}

低可能性用户

""", unsafe_allow_html=True) # 转化可能性分布图 fig, ax = plt.subplots(figsize=(8, 5)) conv_counts = result_df["转化可能性"].value_counts() conv_counts.plot(kind='bar', color=['#4CAF50', '#FFC107', '#F44336'], ax=ax) plt.title('用户转化可能性分布') plt.xlabel('可能性等级') plt.ylabel('用户数量') st.pyplot(fig) # 详细预测结果 st.subheader("详细预测结果") # 样式函数 def color_convert(val): if val == "高可能性": return "background-color: #c8e6c9; color: #388e3c;" elif val == "中可能性": return "background-color: #fff9c4; color: #f57f17;" else: return "background-color: #ffcdd2; color: #c62828;" # 格式化显示 display_df = result_df[["CCUST_ROW_ID", "转化概率", "预测结果", "转化可能性"]] styled_df = display_df.style.format({ "转化概率": "{:.2%}" }).applymap(color_convert, subset=["转化可能性"]) st.dataframe(styled_df, height=400) # 下载结果 csv = display_df.to_csv(index=False).encode("utf-8") st.download_button( label="下载预测结果", data=csv, file_name="用户转化预测结果.csv", mime="text/csv", use_container_width=True ) # 页脚 st.markdown("---") st.markdown("""
© 2023 精准营销系统 | 基于Spark和Streamlit开发 | 优化版Spark连接
""", unsafe_allow_html=True) 根据上述代码，给出需要配置的环境，具体步骤

-在Ubuntu上，可以使用以下命令安装OpenJDK11:```bashsudoapt updatesudo aptinstall openjdk-11-jdk```-安装后，设置`JAVA_HOME`环境变量。找到Java安装路径（通常为`/usr/lib/jvm/java-11-openjdk-amd64`），然后...

import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from pyspark.sql import SparkSession from pyspark.ml.feature import VectorAssembler, StringIndexer, OneHotEncoder from pyspark.ml import Pipeline from pyspark.ml.classification import LogisticRegression, DecisionTreeClassifier, RandomForestClassifier from pyspark.ml.evaluation import BinaryClassificationEvaluator, MulticlassClassificationEvaluator import os import time import warnings import tempfile import subprocess import sys # 忽略警告 warnings.filterwarnings("ignore") # 检查Java版本 def check_java_version(): try: java_version = subprocess.check_output(['java', '-version'], stderr=subprocess.STDOUT, text=True) st.info(f"Java版本信息:\n{java_version}") if 'version "1.8' in java_version: st.error("检测到Java 8 (1.8.x)，但Spark需要Java 11或更高版本") st.warning("请升级您的Java环境或设置JAVA_HOME指向Java 11+") st.stop() elif 'version "11' in java_version or 'version "17' in java_version: st.success("Java版本兼容") else: st.warning(f"检测到未知Java版本: {java_version}") except Exception as e: st.error(f"无法检查Java版本: {str(e)}") st.stop() # 设置中文字体 plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False # 页面设置 st.set_page_config( page_title="精准营销系统", page_icon="📊", layout="wide", initial_sidebar_state="expanded" ) # 自定义CSS样式 st.markdown(""" <style> .stApp { background: linear-gradient(135deg, #f8极客时间fa 0%, #e9ecef 100%); font-family: 'Helvetica Neue', Arial, sans-serif; } .header { background: linear-gradient(90deg, #1a237e 0%, #283593 100%); color: white; padding: 1.5rem; border-radius: 0.75rem; box-shadow: 0 4px 12px rgba(0,0,0,0.1); margin-bottom: 2rem; } .card { background: white; border-radius: 0.75极客时间; padding: 1rem; margin-bottom: 1.5rem; box-shadow: 0 4px 12px rgba(0,0,0,0.08); transition: transform 0.3s ease; } .card:hover { transform: translateY(-5px); box-shadow: 0 6px 16px rgba(0,0,0,0.12); } .stButton button { background: linear-gradient(90deg, #3949ab 0%, #1a237e 100%) !important; color: white !important; border: none !important; border-radius: 0.5rem; padding: 0.75rem 1.5rem; font-size: 1rem; font-weight: 600; transition: all 0.3s ease; width: 100%; } .stButton button:hover { transform: scale(1.05); box-shadow: 0 4px 8px rgba(57, 73, 171, 0.4); } .feature-box { background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%); border-radius: 0.75rem; padding: 1.5rem; margin-bottom: 1.5rem; } .result-box { background: linear-gradient(135deg, #e8f5e9 0%, #c8e6c9 100%); border-radius: 0.75rem; padding: 1.5rem; margin-top: 1.5rem; } .model-box { background: linear-gradient(135deg, #fff3e0 0%, #ffe0b2 100%); border-radius: 0.75rem; padding: 1.5rem; margin-top: 1.5rem; } .stProgress > div > div > div { background: linear-gradient(90deg, #2ecc71 0%, #27ae60 100%) !important; } .metric-card { background: white; border-radius: 0.75rem; padding: 1rem; text-align: center; box-shadow: 0 4px 8px rgba(0,0,0,0.06); } .metric-value { font-size: 1.8rem; font-weight: 700; color: #1a237e; } .metric-label { font-size: 0.9rem; color: #5c6bc0; margin-top: 0.5rem; } .highlight { background: linear-gradient(90deg, #ffeb3b 0%, #fbc02d 100%); padding: 0.2rem 0.5rem; border-radius: 0.25rem; font-weight: 600; } .stDataFrame { border-radius: 0.75rem; box-shadow: 0 4px 8px rgba(0,0,0,0.06); } .convert-high { background-color: #c8e6c9 !important; color: #388e3c !important; font-weight: 700; } .convert-low { background-color: #ffcdd2 !important; color: #c62828 !important; font-weight: 600; } .java-warning { background-color: #fff3cd; border-left: 4px solid #ffc107; padding: 1rem; margin-bottom: 1.5rem; border-radius: 0 0.25rem 0.25rem 0; } </style> """, unsafe_allow_html=True) # 创建Spark会话 - 兼容Java 8 def create_spark_session(): # 使用更小的内存配置避免资源问题 return SparkSession.builder \ .appName("TelecomPrecisionMarketing") \ .config("spark.driver.memory", "2g") \ .config("spark.executor.memory", "2g") \ .config("spark.sql.shuffle.partitions", "8") \ .config("spark.driver.extraJavaOptions", "-Dio.netty.tryReflectionSetAccessible=true") \ .config("spark.executor.extraJavaOptions", "-Dio.netty.tryReflectionSetAccessible=true") \ .getOrCreate() # 数据预处理函数 - 优化版 def preprocess_data(df): """ 优化后的数据预处理函数参数: df: 原始数据 (DataFrame) 返回: 预处理后的数据 (DataFrame) """ # 1. 选择关键特征 available_features = [col for col in df.columns if col in [ 'AGE', 'GENDER', 'ONLINE_DAY', 'TERM_CNT', 'IF_YHTS', 'MKT_STAR_GRADE_NAME', 'PROM_AMT_MONTH', 'is_rh_next' # 目标变量 ]] # 确保目标变量存在 if 'is_rh_next' not in available_features: st.error("错误：数据集中缺少目标变量 'is_rh_next'") return df # 只保留需要的列 df = df[available_features].copy() # 2. 处理缺失值 # 数值特征用中位数填充（比均值更鲁棒） numeric_cols = ['AGE', 'ONLINE_DAY', 'TERM_CNT', 'PROM_AMT_MONTH'] for col in numeric_cols: if col in df.columns: median_val = df[col].median() df[col].fillna(median_val, inplace=True) # 分类特征用众数填充 categorical_cols = ['GENDER', 'MKT_STAR_GRADE_NAME', 'IF_YHTS'] for col in categorical_cols: if col in df.columns: mode_val = df[col].mode()[0] if not df[col].mode().empty else '未知' df[col].fillna(mode_val, inplace=True) # 3. 异常值处理（使用IQR方法） def handle_outliers(series): Q1 = series.quantile(0.25) Q3 = series.quantile(0.75) IQR = Q3 - Q1 lower_bound = Q1 - 1.5 * IQR upper_bound = Q3 + 1.5 * IQR return series.clip(lower_bound, upper_bound) for col in numeric_cols: if col in df.columns: df[col] = handle_outliers(df[col]) return df # 标题区域 st.markdown("""

精准营销系统

基于机器学习的单宽转融预测

""", unsafe_allow_html=True) # Java版本检查 st.markdown("""
Java版本兼容性检查
Spark需要Java 11或更高版本，检测到您当前使用的是Java 8

已启用兼容模式，但建议升级到Java 11+以获得最佳性能

""", unsafe_allow_html=True) # 页面布局 col1, col2 = st.columns([1, 1.5]) # 左侧区域 - 图片和简介 with col1: st.markdown("""

📱 智能营销系统

预测单宽带用户转化为融合套餐用户的可能性

""", unsafe_allow_html=True) # 使用在线图片作为占位符 st.image("https://images.unsplash.com/photo-1551836022-d5d88e9218df?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1200&q=80", caption="精准营销系统示意图", use_column_width=True) st.markdown("""

📈 系统功能

用户转化可能性预测

高精度机器学习模型

可视化数据分析

精准营销策略制定

""", unsafe_allow_html=True) # 右侧区域 - 功能选择 with col2: st.markdown("""

📋 请选择操作类型

您可以选择数据分析或使用模型进行预测

""", unsafe_allow_html=True) # 功能选择 option = st.radio("", ["📊 数据分析 - 探索数据并训练模型", "🔍 预测分析 - 预测用户转化可能性"], index=0, label_visibility="hidden") # 数据分析部分 if "数据分析" in option: st.markdown("""

数据分析与模型训练

上传数据并训练预测模型

""", unsafe_allow_html=True) # 上传训练数据 train_file = st.file_uploader("上传数据集 (CSV格式, GBK编码)", type=["csv"]) if train_file is not None: try: # 读取数据 train_data = pd.read_csv(train_file, encoding='GBK') # 显示数据预览 with st.expander("数据预览", expanded=True): st.dataframe(train_data.head()) col1, col2 = st.columns(2) col1.metric("总样本数", train_data.shape[0]) col2.metric("特征数量", train_data.shape[1] - 1) # 数据预处理 st.subheader("数据预处理") with st.spinner("数据预处理中..."): processed_data = preprocess_data(train_data) st.success("✅ 数据预处理完成") # 可视化数据分布 st.subheader("数据分布分析") # 目标变量分布 st.markdown("目标变量分布 (is_rh_next)") fig, ax = plt.subplots(figsize=(8, 5)) sns.countplot(x='is_rh_next', data=processed_data, palette='viridis') plt.title('用户转化分布 (0:未转化, 1:转化)') plt.xlabel('是否转化') plt.ylabel('用户数量') st.pyplot(fig) # 数值特征分布 st.markdown("数值特征分布") numeric_cols = ['AGE', 'ONLINE_DAY', 'TERM_CNT', 'PROM_AMT_MONTH'] # 动态计算子图布局 num_features = len(numeric_cols) if num_features > 0: ncols = 2 nrows = (num_features + ncols - 1) // ncols # 向上取整 fig, axes = plt.subplots(nrows, ncols, figsize=(14, 4*nrows)) # 将axes展平为一维数组 if nrows > 1 or ncols > 1: axes = axes.flatten() else: axes = [axes] # 单个子图时确保axes是列表 for i, col in enumerate(numeric_cols): if col in processed_data.columns and i < len(axes): sns.histplot(processed_data[col], kde=True, ax=axes[i], color='skyblue') axes[i].set_title(f'{col}分布') axes[i].set_xlabel('') # 隐藏多余的子图 for j in range(i+1, len(axes)): axes[j].set_visible(False) plt.tight_layout() st.pyplot(fig) else: st.warning("没有可用的数值特征") # 特征相关性分析 st.markdown("特征相关性热力图") corr_cols = numeric_cols + ['is_rh_next'] if len(corr_cols) > 1: corr_data = processed_data[corr_cols].corr() fig, ax = plt.subplots(figsize=(12, 8)) sns.heatmap(corr_data, annot=True, fmt=".2f", cmap='coolwarm', ax=ax) plt.title('特征相关性热力图') st.pyplot(fig) else: st.warning("特征不足，无法生成相关性热力图") # 模型训练 st.subheader("模型训练") # 训练参数设置 col1, col2 = st.columns(2) test_size = col1.slider("测试集比例", 0.1, 0.4, 0.2, 0.05) random_state = col2.number_input("随机种子", 0, 100, 42) # 开始训练按钮 if st.button("开始训练模型", use_container_width=True): # 创建临时目录用于存储模型 with tempfile.TemporaryDirectory() as tmp_dir: model_path = os.path.join(tmp_dir, "best_model") progress_bar = st.progress(0) status_text = st.empty() # 步骤1: 创建Spark会话 status_text.text("步骤1/7: 初始化Spark会话...") spark = create_spark_session() progress_bar.progress(15) # 步骤2: 转换为Spark DataFrame status_text.text("步骤2/7: 转换数据为Spark格式...") spark_df = spark.createDataFrame(processed_data) progress_bar.progress(30) # 步骤3: 划分训练集和测试集 status_text.text("步骤3/7: 划分训练集和测试集...") train_df, test_df = spark_df.randomSplit([1.0 - test_size, test_size], seed=random_state) progress_bar.progress(40) # 步骤4: 特征工程 status_text.text("步骤4/7: 特征工程处理...") categorical_cols = ['GENDER', 'MKT_STAR_GRADE_NAME', 'IF_YHTS'] existing_cat_cols = [col for col in categorical_cols if col in processed_data.columns] # 创建特征处理管道 indexers = [StringIndexer(inputCol=col, outputCol=col+"_index") for col in existing_cat_cols] encoders = [OneHotEncoder(inputCol=col+"_index", outputCol=col+"_encoded") for col in existing_cat_cols] numeric_cols = ['AGE', 'ONLINE_DAY', 'TERM_CNT', 'PROM_AMT_MONTH'] feature_cols = numeric_cols + [col+"_encoded" for col in existing_cat_cols] assembler = VectorAssembler(inputCols=feature_cols, outputCol="features") label_indexer = StringIndexer(inputCol="is_rh_next", outputCol="label") progress_bar.progress(50) # 步骤5: 构建模型 status_text.text("步骤5/7: 构建和训练模型...") # 使用更简单的模型配置 rf = RandomForestClassifier( featuresCol="features", labelCol="label", numTrees=50, # 减少树的数量 maxDepth=5, # 限制深度 seed=random_state ) pipeline = Pipeline(stages=indexers + encoders + [assembler, label_indexer, rf]) model = pipeline.fit(train_df) progress_bar.progress(80) # 步骤6: 评估模型 status_text.text("步骤6/7: 评估模型性能...") predictions = model.transform(test_df) evaluator_auc = BinaryClassificationEvaluator(labelCol="label", rawPredictionCol="rawPrediction") evaluator_acc = MulticlassClassificationEvaluator(labelCol="label", predictionCol="prediction", metricName="accuracy") auc = evaluator_auc.evaluate(predictions) acc = evaluator_acc.evaluate(predictions) results = { "Random Forest": {"AUC": auc, "Accuracy": acc} } progress_bar.progress(95) # 步骤7: 保存结果 status_text.text("步骤7/7: 保存模型和结果...") model.write().overwrite().save(model_path) st.session_state.model_results = results st.session_state.best_model = model st.session_state.model_path = model_path st.session_state.spark = spark progress_bar.progress(100) st.success("🎉 模型训练完成！") # 显示模型性能 st.subheader("模型性能评估") results_df = pd.DataFrame(results).T st.dataframe(results_df.style.format("{:.4f}").background_gradient(cmap='Blues')) # 特征重要性 st.subheader("特征重要性") rf_model = model.stages[-1] feature_importances = rf_model.featureImportances.toArray() importance_df = pd.DataFrame({ "Feature": feature_cols, "Importance": feature_importances }).sort_values("Importance", ascending=False).head(10) fig, ax = plt.subplots(figsize=(10, 6)) sns.barplot(x="Importance", y="Feature", data=importance_df, palette="viridis", ax=ax) plt.title('Top 10 重要特征') st.pyplot(fig) except Exception as e: st.error(f"模型训练错误: {str(e)}") # 预测分析部分 else: st.markdown("""

用户转化预测

预测单宽带用户转化为融合套餐的可能性

""", unsafe_allow_html=True) # 上传预测数据 predict_file = st.file_uploader("上传预测数据 (CSV格式, GBK编码)", type=["csv"]) if predict_file is not None: try: # 读取数据 predict_data = pd.read_csv(predict_file, encoding='GBK') # 显示数据预览 with st.expander("数据预览", expanded=True): st.dataframe(predict_data.head()) # 检查是否有模型 if "model_path" not in st.session_state: st.warning("⚠️ 未找到训练好的模型，请先训练模型") st.stop() # 开始预测按钮 if st.button("开始预测", use_container_width=True): progress_bar = st.progress(0) status_text = st.empty() # 步骤1: 数据预处理 status_text.text("步骤1/4: 数据预处理中...") processed_data = preprocess_data(predict_data) progress_bar.progress(25) # 步骤2: 创建Spark会话 status_text.text("步骤2/4: 初始化Spark会话...") if "spark" not in st.session_state: spark = create_spark_session() st.session_state.spark = spark else: spark = st.session_state.spark progress_bar.progress(50) # 步骤3: 预测 status_text.text("步骤3/4: 进行预测...") spark_df = spark.createDataFrame(processed_data) best_model = st.session_state.best_model predictions = best_model.transform(spark_df) progress_bar.progress(75) # 步骤4: 处理结果 status_text.text("步骤4/4: 处理预测结果...") predictions_df = predictions.select( "CCUST_ROW_ID", "probability", "prediction" ).toPandas() # 解析概率值 predictions_df['转化概率'] = predictions_df['probability'].apply(lambda x: float(x[1])) predictions_df['预测结果'] = predictions_df['prediction'].apply(lambda x: "可能转化" if x == 1.0 else "可能不转化") # 添加转化可能性等级 predictions_df['转化可能性'] = pd.cut( predictions_df['转化概率'], bins=[0, 0.3, 0.7, 1], labels=["低可能性", "中可能性", "高可能性"] ) # 保存结果 st.session_state.prediction_results = predictions_df progress_bar.progress(100) st.success("✅ 预测完成！") except Exception as e: st.error(f"预测错误: {str(e)}") # 显示预测结果 if "prediction_results" in st.session_state: st.markdown("""

预测结果

用户转化可能性评估报告

""", unsafe_allow_html=True) result_df = st.session_state.prediction_results # 转化可能性分布 st.subheader("转化可能性分布概览") col1, col2, col3 = st.columns(3) high_conv = (result_df["转化可能性"] == "高可能性").sum() med_conv = (result_df["转化可能性"] == "中可能性").sum() low_conv = (result_df["转化可能性"] == "低可能性").sum() col1.markdown(f"""

{high_conv}

高可能性用户

""", unsafe_allow_html=True) col2.markdown(f"""

{med_conv}

中可能性用户

""", unsafe_allow_html=True) col3.markdown(f"""

{low_conv}

低可能性用户

""", unsafe_allow_html=True) # 转化可能性分布图 fig, ax = plt.subplots(f极客时间size=(8, 5)) conv_counts = result_df["转化可能性"].value_counts() conv_counts.plot(kind='bar', color=['#4CAF50', '#FFC107', '#F44336'], ax=ax) plt.title('用户转化可能性分布') plt.xlabel('可能性等级') plt.ylabel('用户数量') st.pyplot(fig) # 详细预测结果 st.subheader("详细预测结果") # 样式函数 def color_convert(val): if val == "高可能性": return "background-color: #c8e6c9; color: #388e3c;" elif val == "中可能性": return "background-color: #fff9c4; color: #f57f17;" else: return "background-color: #ffcdd2; color: #c62828;" # 格式化显示 display_df = result_df[["CCUST_ROW_ID", "转化概率", "预测结果", "转化可能性"]] styled_df = display_df.style.format({ "转化概率": "{:.2%}" }).applymap(color_convert, subset=["转化可能性"]) st.dataframe(styled_df, height=400) # 下载结果 csv = display_df.to_csv(index=False).encode("utf-8") st.download_button( label="下载预测结果", data=csv, file_name="用户转化预测结果.csv", mime="text/csv", use_container_width=True ) # 页脚 st.markdown("---") st.markdown("""
© 2023 精准营销系统 | 基于Spark和Streamlit开发
""", unsafe_allow_html=True) 修改上述代码，使其spark兼容jdk1.8.0_261

-使用PySpark2.4.8（最后一个支持Java8的版本）但是，由于我们无法在代码中直接切换PySpark版本（需要安装），因此我们只能假设环境已经安装了PySpark2.4.x。因此，修改计划如下：1.修改Java版本检查：如果检测到...

import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from pyspark.sql import SparkSession from pyspark.ml.feature import VectorAssembler, StringIndexer, OneHotEncoder from pyspark.ml import Pipeline from pyspark.ml.classification import LogisticRegression, DecisionTreeClassifier, RandomForestClassifier from pyspark.ml.evaluation import BinaryClassificationEvaluator, MulticlassClassificationEvaluator import os import time import warnings import tempfile import subprocess import sys import shutil # 忽略警告 warnings.filterwarnings("ignore") # 设置中文字体 plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False # 页面设置 st.set_page_config( page_title="精准营销系统", page_icon="📊", layout="wide", initial_sidebar_state="expanded" ) # 自定义CSS样式 st.markdown(""" <style> .stApp { background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); font-family: 'Helvetica Neue', Arial, sans-serif; } .header { background: linear-gradient(90deg, #1a237e 0%, #283593 100%); color: white; padding: 1.5rem; border-radius: 0.75rem; box-shadow: 0 4px 12px rgba(0,0,0,0.1); margin-bottom: 2rem; } .card { background: white; border-radius: 0.75rem; padding: 1rem; margin-bottom: 1.5rem; box-shadow: 0 4px 12px transition: transform 0.3s ease; } .card:hover { transform: translateY(-5px); box-shadow: 0 6px 16px rgba(0,0,0,0.12); } .stButton button { background: linear-gradient(90deg, #3949ab 0%, #1a237e 100%) !important; color: white !important; border: none !important; border-radius: 0.5rem; padding: 0.75rem 1.5rem; font-size: 1rem; font-weight: 600; transition: all 0.3s ease; width: 100%; } .stButton button:hover { transform: scale(1.05); box-shadow: 0 4px 8px rgba(57, 73, 171, 0.4); } .feature-box { background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%); border-radius: 0.75rem; padding: 1.5rem; margin-bottom: 1.5rem; } .result-box { background: linear-gradient(135deg, #e8f5e9 0%, #c8e6c9 100%); border-radius: 0.75rem; padding: 1.5rem; margin-top: 1.5rem; } .model-box { background: linear-gradient(135deg, #fff3e0 0%, #ffe0b2 100%); border-radius: 0.75rem; padding: 1.5rem; margin-top: 1.5rem; } .stProgress > div > div > div { background: linear-gradient(90deg, #2ecc71 0%, #27ae60 100%) !important; } .metric-card { background: white; border-radius: 0.75rem; padding: 1rem; text-align: center; box-shadow: 0 4px 8px rgba(0,0,0,0.06); } .metric-value { font-size: 1.8rem; font-weight: 700; color: #1a237e; } .metric-label { font-size: 0.9rem; color: #5c6bc0; margin-top: 0.5rem; } .highlight { background: linear-gradient(90deg, #ffeb3b 0%, #fbc02d 100%); padding: 0.2rem 0.5rem; border-radius: 0.25rem; font-weight: 600; } .stDataFrame { border-radius: 0.75rem; box-shadow: 0 4px 8px rgba(0,0,0,0.06); } .convert-high { background-color: #c8e6c9 !important; color: #388e3c !important; font-weight: 700; } .convert-low { background-color: #ffcdd2 !important; color: #c62828 !important; font-weight: 600; } .java-success { background-color: #d4edda; border-left: 4px solid #28a745; padding: 1rem; margin-bottom: 1.5rem; border-radius: 0 0.25rem 0.25rem 0; } </style> """, unsafe_allow_html=True) # 创建优化的Spark会话 def create_spark_session(): """创建优化的Spark会话，使用高效的配置参数""" try: # 基础配置 - 优化资源使用 builder = SparkSession.builder \ .appName("TelecomPrecisionMarketing") \ .config("spark.driver.memory", "1g") \ .config("spark.executor.memory", "1g") \ .config("spark.sql.shuffle.partitions", "4") \ .config("spark.network.timeout", "800s") \ .config("spark.executor.heartbeatInterval", "60s") \ .config("spark.sql.legacy.allowUntypedScalaUDF", "true") \ .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") \ .config("spark.kryoserializer.buffer.max", "128m") \ .config("spark.driver.maxResultSize", "1g") \ .config("spark.sql.execution.arrow.pyspark.enabled", "true") \ .config("spark.ui.showConsoleProgress", "false") # 创建会话 spark = builder.getOrCreate() # 验证会话 try: # 创建测试DataFrame验证会话是否正常工作 test_df = spark.createDataFrame([(1, "test"), (2, "session")], ["id", "name"]) test_df_count = test_df.count() if test_df_count == 2: st.success("Spark会话创建成功并验证通过") else: st.warning(f"Spark会话验证异常，返回记录数: {test_df_count}") except Exception as e: st.error(f"Spark会话验证失败: {str(e)}") spark.stop() raise return spark except Exception as e: st.error(f"创建Spark会话失败: {str(e)}") st.error("请检查Java版本和Spark配置") st.stop() # 数据预处理函数 def preprocess_data(df): """优化后的数据预处理函数""" # 1. 选择关键特征 available_features = [col for col in df.columns if col in [ 'AGE', 'GENDER', 'ONLINE_DAY', 'TERM_CNT', 'IF_YHTS', 'MKT_STAR_GRADE_NAME', 'PROM_AMT_MONTH', 'is_rh_next' # 目标变量 ]] # 确保目标变量存在 if 'is_rh_next' not in available_features: st.error("错误：数据集中缺少目标变量 'is_rh_next'") return df # 只保留需要的列 df = df[available_features].copy() # 2. 处理缺失值 numeric_cols = ['AGE', 'ONLINE_DAY', 'TERM_CNT', 'PROM_AMT_MONTH'] for col in numeric_cols: if col in df.columns: median_val = df[col].median() df[col].fillna(median_val, inplace=True) categorical_cols = ['GENDER', 'MKT_STAR_GRADE_NAME', 'IF_YHTS'] for col in categorical_cols: if col in df.columns: mode_val = df[col].mode()[0] if not df[col].mode().empty else '未知' df[col].fillna(mode_val, inplace=True) # 3. 异常值处理 def handle_outliers(series): Q1 = series.quantile(0.25) Q3 = series.quantile(0.75) IQR = Q3 - Q1 lower_bound = Q1 - 1.5 * IQR upper_bound = Q3 + 1.5 * IQR return series.clip(lower_bound, upper_bound) for col in numeric_cols: if col in df.columns: df[col] = handle_outliers(df[col]) return df # 标题区域 st.markdown("""

精准营销系统

基于机器学习的单宽转融预测

""", unsafe_allow_html=True) # 页面布局 col1, col2 = st.columns([1, 1.5]) # 左侧区域 - 图片和简介 with col1: st.markdown("""

📱 智能营销系统

预测单宽带用户转化为融合套餐用户的可能性

""", unsafe_allow_html=True) # 使用在线图片作为占位符 st.image("https://images.unsplash.com/photo-1551836022-d5d88e9218df?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1200&q=80", caption="精准营销系统示意图", use_column_width=True) st.markdown("""

📈 系统功能

用户转化可能性预测

高精度机器学习模型

可视化数据分析

精准营销策略制定

""", unsafe_allow_html=True) # 右侧区域 - 功能选择 with col2: st.markdown("""

📋 请选择操作类型

您可以选择数据分析或使用模型进行预测

""", unsafe_allow_html=True) # 功能选择 option = st.radio("操作类型", ["📊 数据分析 - 探索数据并训练模型", "🔍 预测分析 - 预测用户转化可能性"], index=0) # 数据分析部分 if "数据分析" in option: st.markdown("""

数据分析与模型训练

上传数据并训练预测模型

""", unsafe_allow_html=True) # 上传训练数据 train_file = st.file_uploader("上传数据集 (CSV格式, GBK编码)", type=["csv"]) if train_file is not None: try: # 读取数据 train_data = pd.read_csv(train_file, encoding='GBK') # 显示数据预览 with st.expander("数据预览", expanded=True): st.dataframe(train_data.head()) col1, col2 = st.columns(2) col1.metric("总样本数", train_data.shape[0]) col2.metric("特征数量", train_data.shape[1] - 1) # 数据预处理 st.subheader("数据预处理") with st.spinner("数据预处理中..."): processed_data = preprocess_data(train_data) st.success("✅ 数据预处理完成") # 可视化数据分布 st.subheader("数据分布分析") # 目标变量分布 st.markdown("目标变量分布 (is_rh_next)") fig, ax = plt.subplots(figsize=(8, 5)) sns.countplot(x='is_rh_next', data=processed_data, palette='viridis') plt.title('用户转化分布 (0:未转化, 1:转化)') plt.xlabel('是否转化') plt.ylabel('用户数量') st.pyplot(fig) # 数值特征分布 st.markdown("数值特征分布") numeric_cols = ['AGE', 'ONLINE_DAY', 'TERM_CNT', 'PROM_AMT_MONTH'] # 动态计算子图布局 num_features = len(numeric_cols) if num_features > 0: ncols = 2 nrows = (num_features + ncols - 1) // ncols fig, axes = plt.subplots(nrows, ncols, figsize=(14, 4*nrows)) if nrows > 1 or ncols > 1: axes = axes.flatten() else: axes = [axes] for i, col in enumerate(numeric_cols): if col in processed_data.columns and i < len(axes): sns.histplot(processed_data[col], kde=True, ax=axes[i], color='skyblue') axes[i].set_title(f'{col}分布') axes[i].set_xlabel('') for j in range(i+1, len(axes)): axes[j].set_visible(False) plt.tight_layout() st.pyplot(fig) else: st.warning("没有可用的数值特征") # 特征相关性分析 st.markdown("特征相关性热力图") corr_cols = numeric_cols + ['is_rh_next'] if len(corr_cols) > 1: corr_data = processed_data[corr_cols].corr() fig, ax = plt.subplots(figsize=(12, 8)) sns.heatmap(corr_data, annot=True, fmt=".2f", cmap='coolwarm', ax=ax) plt.title('特征相关性热力图') st.pyplot(fig) else: st.warning("特征不足，无法生成相关性热力图") # 模型训练 st.subheader("模型训练") # 训练参数设置 col1, col2 = st.columns(2) test_size = col1.slider("测试集比例", 0.1, 0.4, 0.2, 0.05) random_state = col2.number_input("随机种子", 0, 100, 42) # 开始训练按钮 if st.button("开始训练模型", use_container_width=True): # 创建临时目录用于存储模型 with tempfile.TemporaryDirectory() as tmp_dir: # 修复路径问题：使用绝对路径 model_path = os.path.abspath(os.path.join(tmp_dir, "best_model")) progress_bar = st.progress(0) status_text = st.empty() # 步骤1: 创建Spark会话 status_text.text("步骤1/7: 初始化Spark会话...") spark = create_spark_session() progress_bar.progress(15) # 步骤2: 转换为Spark DataFrame status_text.text("步骤2/7: 转换数据为Spark格式...") spark_df = spark.createDataFrame(processed_data) progress_bar.progress(30) # 步骤3: 划分训练集和测试集 status_text.text("步骤3/7: 划分训练集和测试集...") train_df, test_df = spark_df.randomSplit([1.0 - test_size, test_size], seed=random_state) progress_bar.progress(40) # 步骤4: 特征工程 status_text.text("步骤4/7: 特征工程处理...") categorical_cols = ['GENDER', 'MKT_STAR_GRADE_NAME', 'IF_YHTS'] existing_cat_cols = [col for col in categorical_cols if col in processed_data.columns] # 创建特征处理管道 indexers = [StringIndexer(inputCol=col, outputCol=col+"_index") for col in existing_cat_cols] encoders = [OneHotEncoder(inputCol=col+"_index", outputCol=col+"_encoded") for col in existing_cat_cols] numeric_cols = ['AGE', 'ONLINE_DAY', 'TERM_CNT', 'PROM_AMT_MONTH'] feature_cols = numeric_cols + [col+"_encoded" for col in existing_cat_cols] assembler = VectorAssembler(inputCols=feature_cols, outputCol="features") label_indexer = StringIndexer(inputCol="is_rh_next", outputCol="label") progress_bar.progress(50) # 步骤5: 构建模型 status_text.text("步骤5/7: 构建和训练模型...") # 使用优化的模型配置 rf = RandomForestClassifier( featuresCol="features", labelCol="label", numTrees=50, # 增加树的数量提高精度 maxDepth=5, # 适当增加深度 seed=random_state, featureSubsetStrategy="auto", # 自动选择特征子集策略 impurity="gini" # 使用基尼不纯度 ) pipeline = Pipeline(stages=indexers + encoders + [assembler, label_indexer, rf]) model = pipeline.fit(train_df) progress_bar.progress(80) # 步骤6: 评估模型 status_text.text("步骤6/7: 评估模型性能...") predictions = model.transform(test_df) evaluator_auc = BinaryClassificationEvaluator(labelCol="label", rawPredictionCol="rawPrediction") evaluator_acc = MulticlassClassificationEvaluator(labelCol="label", predictionCol="prediction", metricName="accuracy") evaluator_f1 = MulticlassClassificationEvaluator(labelCol="label", predictionCol="prediction", metricName="f1") auc = evaluator_auc.evaluate(predictions) acc = evaluator_acc.evaluate(predictions) f1 = evaluator_f1.evaluate(predictions) results = { "Random Forest": {"AUC": auc, "Accuracy": acc, "F1 Score": f1} } progress_bar.progress(95) # 步骤7: 保存结果 status_text.text("步骤7/7: 保存模型和结果...") # 确保目录存在 os.makedirs(model_path, exist_ok=True) model.write().overwrite().save(model_path) st.session_state.model_results = results st.session_state.best_model = model st.session_state.model_path = model_path st.session_state.spark = spark progress_bar.progress(100) st.success("🎉 模型训练完成！") # 显示模型性能 st.subheader("模型性能评估") results_df = pd.DataFrame(results).T st.dataframe(results_df.style.format("{:.4f}").background_gradient(cmap='Blues')) # 特征重要性 st.subheader("特征重要性") rf_model = model.stages[-1] feature_importances = rf_model.featureImportances.toArray() importance_df = pd.DataFrame({ "Feature": feature_cols, "Importance": feature_importances }).sort_values("Importance", ascending=False).head(10) fig, ax = plt.subplots(figsize=(10, 6)) sns.barplot(x="Importance", y="Feature", data=importance_df, palette="viridis", ax=ax) plt.title('Top 10 重要特征') st.pyplot(fig) except Exception as e: st.error(f"模型训练错误: {str(e)}") st.error("提示：请检查数据格式和特征列名") # 预测分析部分 else: st.markdown("""

用户转化预测

预测单宽带用户转化为融合套餐的可能性

""", unsafe_allow_html=True) # 上传预测数据 predict_file = st.file_uploader("上传预测数据 (CSV格式, GBK编码)", type=["csv"]) if predict_file is not None: try: # 读取数据 predict_data = pd.read_csv(predict_file, encoding='GBK') # 显示数据预览 with st.expander("数据预览", expanded=True): st.dataframe(predict_data.head()) # 检查是否有模型 if "model_path" not in st.session_state: st.warning("⚠️ 未找到训练好的模型，请先训练模型") st.stop() # 开始预测按钮 if st.button("开始预测", use_container_width=True): progress_bar = st.progress(0) status_text = st.empty() # 步骤1: 数据预处理 status_text.text("步骤1/4: 数据预处理中...") processed_data = preprocess_data(predict_data) progress_bar.progress(25) # 步骤2: 创建Spark会话 status_text.text("步骤2/4: 初始化Spark会话...") if "spark" not in st.session_state: spark = create_spark_session() st.session_state.spark = spark else: spark = st.session_state.spark progress_bar.progress(50) # 步骤3: 预测 status_text.text("步骤3/4: 进行预测...") spark_df = spark.createDataFrame(processed_data) best_model = st.session_state.best_model predictions = best_model.transform(spark_df) progress_bar.progress(75) # 步骤4: 处理结果 status_text.text("步骤4/4: 处理预测结果...") predictions_df = predictions.select( "CCUST_ROW_ID", "probability", "prediction" ).toPandas() # 解析概率值 predictions_df['转化概率'] = predictions_df['probability'].apply(lambda x: float(x[1])) predictions_df['预测结果'] = predictions_df['prediction'].apply(lambda x: "可能转化" if x == 1.0 else "可能不转化") # 添加转化可能性等级 predictions_df['转化可能性'] = pd.cut( predictions_df['转化概率'], bins=[0, 0.3, 0.7, 1], labels=["低可能性", "中可能性", "高可能性"] ) # 保存结果 st.session_state.prediction_results = predictions_df progress_bar.progress(100) st.success("✅ 预测完成！") except Exception as e: st.error(f"预测错误: {str(e)}") # 显示预测结果 if "prediction_results" in st.session_state: st.markdown("""

预测结果

用户转化可能性评估报告

""", unsafe_allow_html=True) result_df = st.session_state.prediction_results # 转化可能性分布 st.subheader("转化可能性分布概览") col1, col2, col3 = st.columns(3) high_conv = (result_df["转化可能性"] == "高可能性").sum() med_conv = (result_df["转化可能性"] == "中可能性").sum() low_conv = (result_df["转化可能性"] == "低可能性").sum() col1.markdown(f"""

{high_conv}

高可能性用户

""", unsafe_allow_html=True) col2.markdown(f"""

{med_conv}

中可能性用户

""", unsafe_allow_html=True) col3.markdown(f"""

{low_conv}

低可能性用户

""", unsafe_allow_html=True) # 转化可能性分布图 fig, ax = plt.subplots(figsize=(8, 5)) conv_counts = result_df["转化可能性"].value_counts() conv_counts.plot(kind='bar', color=['#4CAF50', '#FFC107', '#F44336'], ax=ax) plt.title('用户转化可能性分布') plt.xlabel('可能性等级') plt.ylabel('用户数量') st.pyplot(fig) # 详细预测结果 st.subheader("详细预测结果") # 样式函数 def color_convert(val): if val == "高可能性": return "background-color: #c8e6c9; color: #388e3c;" elif val == "中可能性": return "background-color: #fff9c4; color: #f57f17;" else: return "background-color: #ffcdd2; color: #c62828;" # 格式化显示 display_df = result_df[["CCUST_ROW_ID", "转化概率", "预测结果", "转化可能性"]] styled_df = display_df.style.format({ "转化概率": "{:.2%}" }).applymap(color_convert, subset=["转化可能性"]) st.dataframe(styled_df, height=400) # 下载结果 csv = display_df.to_csv(index=False).encode("utf-8") st.download_button( label="下载预测结果", data=csv, file_name="用户转化预测结果.csv", mime="text/csv", use_container_width=True ) # 页脚 st.markdown("---") st.markdown("""
© 2023 精准营销系统 | 基于Spark和Streamlit开发 | 优化版Spark连接
""", unsafe_allow_html=True) 执行上述代码，streamlit-spark无反应，提示找不到指定路径 (base) D:\2035946879>streamlit run 111.py --server.maxUploadSize=500 You can now view your Streamlit app in your browser. Local URL: http://localhost:8502 Network URL: http://10.73.24.34:8502 系统找不到指定的路径。

- 明确设置`JAVA_HOME`环境变量指向JDK安装路径 - 使用`findspark`包正确初始化Spark环境 2. **Windows特定配置**： - 添加`spark.local.dir`配置指定临时目录 - 设置Java临时目录参数`-Djava.io.tmpdir` - ...

import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from pyspark.sql import SparkSession from pyspark.ml.feature import VectorAssembler, StringIndexer, OneHotEncoder from pyspark.ml import Pipeline from pyspark.ml.classification import LogisticRegression, DecisionTreeClassifier, RandomForestClassifier from pyspark.ml.evaluation import BinaryClassificationEvaluator, MulticlassClassificationEvaluator import os import time import warnings import tempfile # 忽略警告 warnings.filterwarnings("ignore") # 设置中文字体 plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False # 页面设置 st.set_page_config( page_title="精准营销系统", page_icon="📊", layout="wide", initial_sidebar_state="expanded" ) # 自定义CSS样式 st.markdown(""" <style> .stApp { background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); font-family: 'Helvetica Neue', Arial, sans-serif; } .header { background: linear-gradient(90deg, #1a237e 0%, #283593 100%); color: white; padding: 1.5rem; border-radius: 0.75rem; box-shadow: 0 4极客时间12px rgba(0,0,0,0.1); margin-bottom: 2rem; } .card { background: white; border-radius: 0.75rem; padding: 1rem; margin-bottom: 1.5rem; box-shadow: 0 4px 12px rgba(0,0,0,0.08); transition: transform 0.3s ease; } .card:hover { transform: translateY(-5px); box-shadow: 0 6px 16px rgba(0,0,0,0.12); } .stButton button { background: linear-gradient(90deg, #3949ab 0%, #1a237e 100%) !important; color: white !important; border: none !important; border-radius: 0.5rem; padding: 0.75rem 1.5rem; font-size: 1rem; font-weight: 600; transition: all 0.3s ease; width: 100%; } .stButton button:hover { transform: scale(1.05); box-shadow: 0 4px 8px rgba(57, 73, 171, 0.4); } .feature-box { background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%); border-radius: 0.75rem; padding: 1.5rem; margin-bottom: 1.5rem; } .result-box { background: linear-gradient(135deg, #e8f5e9 0%, #c8e6c9 100%); border-radius: 0.75rem; padding: 1.5rem; margin-top: 1.5rem; } .model-box { background: linear-gradient(135deg, #fff3e0 0%, #ffe0b2 100%); border-radius: 0.75rem; padding: 1.5rem; margin-top: 1.5rem; } .stProgress > div > div > div { background: linear-gradient(90deg, #2ecc71 0%, #27ae60 100%) !important; } .metric-card { background: white; border-radius: 0.75rem; padding: 1rem; text-align: center; box-shadow: 0 4px 8px rgba(0,0,0,0.06); } .metric-value { font-size: 1.8rem; font-weight: 700; color: #1a237e; } .metric-label { font-size: 0.9rem; color: #5c6bc0; margin-top: 0.5rem; } .highlight { background: linear-gradient(90deg, #ffeb3b 0%, #fbc02d 100%); padding: 0.2rem 0.5rem; border-radius: 0.25rem; font-weight: 600; } .stDataFrame { border-radius: 0.75rem; box-shadow: 0 4px 8px rgba(0,0,0,0.06); } .convert-high { background-color: #c8e6c9 !important; color: #388e3c !important; font-weight: 700; } .convert-low { background-color: #ffcdd2 !important; color: #c62828 !important; font-weight: 600; } </style> """, unsafe_allow_html=True) # 创建Spark会话 def create_spark_session(): # 使用更小的内存配置避免资源问题 return SparkSession.builder \ .appName("TelecomPrecisionMarketing") \ .config("spark.driver.memory", "2g") \ .config("spark.executor.memory", "2g") \ .config("spark.sql.shuffle.partitions", "8") \ .getOrCreate() # 数据预处理函数 - 优化版 def preprocess_data(df): """ 优化后的数据预处理函数参数: df: 原始数据 (DataFrame) 返回: 预处理后的数据 (DataFrame) """ # 1. 选择关键特征 available_features = [col for col in df.columns if col in [ 'AGE', 'GENDER', 'ONLINE_DAY', 'TERM_CNT', 'IF_YHTS', 'MKT_STAR_GRADE_NAME', 'PROM_AMT_MONTH', 'is_rh_next' # 目标变量 ]] # 确保目标变量存在 if 'is_rh_next' not in available_features: st.error("错误：数据集中缺少目标变量 'is_rh_next'") return df # 只保留需要的列 df = df[available_features].copy() # 2. 处理缺失值 # 数值特征用中位数填充（比均值更鲁棒） numeric_cols = ['AGE', 'ONLINE_DAY', 'TERM_CNT', 'PROM_AMT_MONTH'] for col in numeric_cols: if col in df.columns: median_val = df[col].median() df[col].fillna(median_val, inplace=True) # 分类特征用众数填充 categorical_cols = ['GENDER', 'MKT_STAR_GRADE_NAME', 'IF_YHTS'] for col in categorical_cols: if col in df.columns: mode_val = df[col].mode()[0] if not df[col].mode().empty else '未知' df[col].fillna(mode_val, inplace=True) # 3. 异常值处理（使用IQR方法） def handle_outliers(series): Q1 = series.quantile(0.25) Q3 = series.quantile(0.75) IQR = Q3 - Q1 lower_bound = Q1 - 1.5 * IQR upper_bound = Q3 + 1.5 * IQR return series.clip(lower_bound, upper_bound) for col in numeric_cols: if col in df.columns: df[col] = handle_outliers(df[col]) return df # 标题区域 st.markdown("""

精准营销系统

基于机器学习的单宽转融预测

""", unsafe_allow_html=True) # 页面布局 col1, col2 = st.columns([1, 1.5]) # 左侧区域 - 图片和简介 with col1: st.markdown("""

📱 智能营销系统

预测单宽带用户转化为融合套餐用户的可能性

""", unsafe_allow_html=True) # 使用在线图片作为占位符 st.image("https://images.unsplash.com/photo-1551836022-d5d88e9218df?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1200&q=80", caption="精准营销系统示意图", use_column_width=True) st.markdown("""

📈 系统功能

用户转化可能性预测
<极客时间>高精度机器学习模型
可视化数据分析

精准营销策略制定

""", unsafe_allow_html=True) # 右侧区域 - 功能选择 with col2: st.markdown("""

📋 请选择操作类型

您可以选择数据分析或使用模型进行预测

""", unsafe_allow_html=True) # 功能选择 option = st.radio("", ["📊 数据分析 - 探索数据并训练模型", "🔍 预测分析 - 预测用户转化可能性"], index=0, label_visibility="hidden") # 数据分析部分 if "数据分析" in option: st.markdown("""

数据分析与模型训练

上传数据并训练预测模型

""", unsafe_allow_html=True) # 上传训练数据 train_file = st.file_uploader("上传数据集 (CSV格式, GBK编码)", type=["csv"]) if train_file is not None: try: # 读取数据 train_data = pd.read_csv(train_file, encoding='GBK') # 显示数据预览 with st.expander("数据预览", expanded=True): st.dataframe(train_data.head()) col1, col2 = st.columns(2) col1.metric("总样本数", train_data.shape[0]) col2.metric("特征数量", train_data.shape[1] - 1) # 数据预处理 st.subheader("数据预处理") with st.spinner("数据预处理中..."): processed_data = preprocess_data(train_data) st.success("✅ 数据预处理完成") # 可视化数据分布 st.subheader("数据分布分析") # 目标变量分布 st.markdown("目标变量分布 (is_rh_next)") fig, ax = plt.subplots(figsize=(8, 5)) sns.countplot(x='is_rh_next', data=processed_data, palette='viridis') plt.title('用户转化分布 (0:未转化, 1:转化)') plt.xlabel('是否转化') plt.ylabel('用户数量') st.pyplot(fig) # 数值特征分布 st.markdown("数值特征分布") numeric_cols = ['AGE', 'ONLINE_DAY', 'TERM_CNT', 'PROM_AMT_MONTH'] # 动态计算子图布局 num_features = len(numeric_cols) if num_features > 0: ncols = 2 nrows = (num_features + ncols - 1) // ncols # 向上取整 fig, axes = plt.subplots(nrows, ncols, figsize=(14, 4*nrows)) # 将axes展平为一维数组 if nrows > 1 or ncols > 1: axes = axes.flatten() else: axes = [axes] # 单个子图时确保axes是列表 for i, col in enumerate(numeric_cols): if col in processed_data.columns and i < len(axes): sns.histplot(processed_data[col], kde=True, ax=axes[i], color='skyblue') axes[i].set_title(f'{col}分布') axes[i].set_xlabel('') # 隐藏多余的子图 for j in range(i+1, len(axes)): axes[j].set_visible(False) plt.tight_layout() st.pyplot(fig) else: st.warning("没有可用的数值特征") # 特征相关性分析 st.markdown("特征相关性热力图") corr_cols = numeric_cols + ['is_rh_next'] if len(corr_cols) > 1: corr_data = processed_data[corr_cols].corr() fig, ax = plt.subplots(figsize=(12, 8)) sns.heatmap(corr_data, annot=True, fmt=".2f", cmap='coolwarm', ax=ax) plt.title('特征相关性热力图') st.pyplot(fig) else: st.warning("特征不足，无法生成相关性热力图") # 模型训练 st.subheader("模型训练") # 训练参数设置 col1, col2 = st.columns(2) test_size = col1.slider("测试集比例", 0.1, 0.4, 0.2, 0.05) random_state = col2.number_input("随机种子", 0, 100, 42) # 开始训练按钮 if st.button("开始训练模型", use_container_width=True): # 创建临时目录用于存储模型 with tempfile.TemporaryDirectory() as tmp_dir: model_path = os.path.join(tmp_dir, "best_model") progress_bar = st.progress(0) status_text = st.empty() # 步骤1: 创建Spark会话 status_text.text("步骤1/7: 初始化Spark会话...") spark = create_spark_session() progress_bar.progress(15) # 步骤2: 转换为Spark DataFrame status_text.text("步骤2/7: 转换数据为Spark格式...") spark_df = spark.createDataFrame(processed_data) progress_bar.progress(30) # 步骤3: 划分训练集和测试集 status_text.text("步骤3/7: 划分训练集和测试集...") train_df, test_df = spark_df.randomSplit([1.0 - test_size, test_size], seed=random_state) progress_bar.progress(40) # 步骤4: 特征工程 status_text.text("步骤4/7: 特征工程处理...") categorical_cols = ['GENDER', 'MKT_STAR_GRADE_NAME', 'IF_YHTS'] existing_cat_cols = [col for col in categorical_cols if col in processed_data.columns] # 创建特征处理管道 indexers = [StringIndexer(inputCol=col, outputCol=col+"_index") for col in existing_cat_cols] encoders = [OneHotEncoder(inputCol=col+"_index", outputCol=col+"_encoded") for col in existing_cat_cols] numeric_cols = ['AGE', 'ONLINE_DAY', 'TERM_CNT', 'PROM_AMT_MONTH'] feature_cols = numeric_cols + [col+"_encoded" for col in existing_cat_cols] assembler = VectorAssembler(inputCols=feature_cols, outputCol="features") label_indexer = StringIndexer(inputCol="is_rh_next", outputCol="label") progress_bar.progress(50) # 步骤5: 构建模型 status_text.text("步骤5/7: 构建和训练模型...") # 使用更简单的模型配置 rf = RandomForestClassifier( featuresCol="features", labelCol="label", numTrees=50, # 减少树的数量 maxDepth=5, # 限制深度 seed=random_state ) pipeline = Pipeline(stages=indexers + encoders + [assembler, label_indexer, rf]) model = pipeline.fit(train_df) progress_bar.progress(80) # 步骤6: 评估模型 status_text.text("步骤6/7: 评估模型性能...") predictions = model.transform(test_df) evaluator_auc = BinaryClassificationEvaluator(labelCol="label", rawPredictionCol="rawPrediction") evaluator_acc = MulticlassClassificationEvaluator(labelCol="label", predictionCol="prediction", metricName="accuracy") auc = evaluator_auc.evaluate(predictions) acc = evaluator_acc.evaluate(predictions) results = { "Random Forest": {"AUC": auc, "Accuracy": acc} } progress_bar.progress(95) # 步骤7: 保存结果 status_text.text("步骤7/7: 保存模型和结果...") model.write().overwrite().save(model_path) st.session_state.model_results = results st.session_state.best_model = model st.session_state.model_path = model_path st.session_state.spark = spark progress_bar.progress(100) st.success("🎉 模型训练完成！") # 显示模型性能 st.subheader("模型性能评估") results_df = pd.DataFrame(results).T st.dataframe(results_df.style.format("{:.4f}").background_gradient(cmap='Blues')) # 特征重要性 st.subheader("特征重要性") rf_model = model.stages[-1] feature_importances = rf_model.featureImportances.toArray() importance_df = pd.DataFrame({ "Feature": feature_cols, "Importance": feature_importances }).sort_values("Importance", ascending=False).head(10) fig, ax = plt.subplots(figsize=(10, 6)) sns.barplot(x="Importance", y="Feature", data=importance_df, palette="viridis", ax=ax) plt.title('Top 10 重要特征') st.pyplot(fig) except Exception as e: st.error(f"模型训练错误: {str(e)}") # 预测分析部分 else: st.markdown("""

用户转化预测

预测单宽带用户转化为融合套餐的可能性

""", unsafe_allow_html=True) # 上传预测数据 predict_file = st.file_uploader("上传预测数据 (CSV格式, GBK编码)", type=["csv"]) if predict_file is not None: try: # 读取数据 predict_data = pd.read_csv(predict_file, encoding='GBK') # 显示数据预览 with st.expander("数据预览", expanded=True): st.dataframe(predict_data.head()) # 检查是否有模型 if "model_path" not in st.session_state: st.warning("⚠️ 未找到训练好的模型，请先训练模型") st.stop() # 开始预测按钮 if st.button("开始预测", use_container_width=True): progress_bar = st.progress(0) status_text = st.empty() # 步骤1: 数据预处理 status_text.text("步骤1/4: 数据预处理中...") processed_data = preprocess_data(predict_data) progress_bar.progress(25) # 步骤2: 创建Spark会话 status_text.text("步骤2/4: 初始化Spark会话...") if "spark" not in st.session_state: spark = create_spark_session() st.session_state.spark = spark else: spark = st.session_state.spark progress_bar.progress(50) # 步骤3: 预测 status_text.text("步骤3/4: 进行预测...") spark_df = spark.createDataFrame(processed_data) best_model = st.session_state.best_model predictions = best_model.transform(spark_df) progress_bar.progress(75) # 步骤4: 处理结果 status_text.text("步骤4/4: 处理预测结果...") predictions_df = predictions.select( "CCUST_ROW_ID", "probability", "prediction" ).toPandas() # 解析概率值 predictions_df['转化概率'] = predictions_df['probability'].apply(lambda x: float(x[1])) predictions_df['预测结果'] = predictions_df['prediction'].apply(lambda x: "可能转化" if x == 1.0 else "可能不转化") # 添加转化可能性等级 predictions_df['转化可能性'] = pd.cut( predictions_df['转化概率'], bins=[0, 0.3, 0.7, 1], labels=["低可能性", "中可能性", "高可能性"] ) # 保存结果 st.session_state.prediction_results = predictions_df progress_bar.progress(100) st.success("✅ 预测完成！") except Exception as e: st.error(f"预测错误: {str(e)}") # 显示预测结果 if "prediction_results" in st.session_state: st.markdown("""

预测结果

用户转化可能性评估报告

""", unsafe_allow_html=True) result_df = st.session_state.prediction_results # 转化可能性分布 st.subheader("转化可能性分布概览") col1, col2, col3 = st.columns(3) high_conv = (result_df["转化可能性"] == "高可能性").sum() med_conv = (result_df["转化可能性"] == "中可能性").sum() low_conv = (result_df["转化可能性"] == "低可能性").sum() col1.markdown(f"""

{high_conv}

高可能性用户

""", unsafe_allow_html=True) col2.markdown(f"""

{med_conv}

中可能性用户

""", unsafe_allow_html=True) col3.markdown(f"""

{low_conv}

低可能性用户

""", unsafe_allow_html=True) # 转化可能性分布图 fig, ax = plt.subplots(figsize=(8, 5)) conv_counts = result_df["转化可能性"].value_counts() conv_counts.plot(kind='bar', color=['#4CAF50', '#FFC107', '#F44336'], ax=ax) plt.title('用户转化可能性分布') plt.xlabel('可能性等级') plt.ylabel('用户数量') st.pyplot(fig) # 详细预测结果 st.subheader("详细预测结果") # 样式函数 def color_convert(val): if val == "高可能性": return "background-color: #c8e6c9; color: #388e3c;" elif val == "中可能性": return "background-color: #fff9c4; color: #f57f17;" else: return "background-color: #ffcdd2; color: #c62828;" # 格式化显示 display_df = result_df[["CCUST_ROW_ID", "转化概率", "预测结果", "转化可能性"]] styled_df = display_df.style.format({ "转化概率": "{:.2%}" }).applymap(color_convert, subset=["转化可能性"]) st.dataframe(styled_df, height=400) # 下载结果 csv = display_df.to_csv(index=False).encode("utf-8") st.download_button( label="下载预测结果", data=csv, file_name="用户转化预测结果.csv", mime="text/csv", use_container_width=True ) # 页脚 st.markdown("---") st.markdown("""
© 2023 精准营销系统 | 基于Spark和Streamlit开发
""", unsafe_allow_html=True) 执行上述代码出现如下报错，给出修改后完整代码 Error: A JNI error has occurred, please check your installation and try again Exception in thread "main" java.lang.UnsupportedClassVersionError: org/apache/spark/launcher/Main has been compiled by a more recent version of the Java Runtime (class file version 61.0), this version of the Java Runtime only recognizes class file versions up to 52.0 at java.lang.ClassLoader.defineClass1(Native Method) at java.lang.ClassLoader.defineClass(ClassLoader.java:756) at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142) at java.net.URLClassLoader.defineClass(URLClassLoader.java:468) at java.net.URLClassLoader.access$100(URLClassLoader.java:74) at java.net.URLClassLoader$1.run(URLClassLoader.java:369) at java.net.URLClassLoader$1.run(URLClassLoader.java:363) at java.security.AccessController.doPrivileged(Native Method) at java.net.URLClassLoader.findClass(URLClassLoader.java:362) at java.lang.ClassLoader.loadClass(ClassLoader.java:418) at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:355) at java.lang.ClassLoader.loadClass(ClassLoader.java:351) at sun.launcher.LauncherHelper.checkAndLoadMain(LauncherHelper.java:601)

由于我们无法在代码中直接修改已安装的包，因此这里不提供代码修改，而是提供环境配置的修改。但是，如果问题仍然存在，我们可以尝试在代码中不启动Spark会话，而是使用Pandas和scikit-learn进行建模，但这将改变...

contactList.jsp：<%@ page contentType="text/html;charset=UTF-8" language="java" %> <%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c" %> <!DOCTYPE html> <html lang="zh-CN"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no"> <title>${isBlacklist ? '黑名单' : '联系人列表'}</title> <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet"> <link href="https://cdn.jsdelivr.net/npm/[email protected]/font/bootstrap-icons.css" rel="stylesheet"> <style> :root { --primary-color: #4361ee; --secondary-color: #3f37c9; --success-color: #4cc9f0; --danger-color: #f72585; --warning-color: #f8961e; --light-color: #f8f9fa; --dark-color: #212529; } body { background-color: #f5f7fa; font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; } .container { max-width: 1400px; padding: 20px; } .header-section { background: linear-gradient(135deg, var(--primary-color), var(--secondary-color)); color: white; border-radius: 15px; padding: 2rem; margin-bottom: 2rem; box-shadow: 0 10px 20px rgba(67, 97, 238, 0.15); } .page-title { font-weight: 700; margin: 0; display: flex; align-items: center; } .page-title i { font-size: 2rem; margin-right: 1rem; } .action-buttons .btn { border-radius: 50px; padding: 0.5rem 1.5rem; font-weight: 500; margin-left: 10px; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); transition: all 0.3s ease; } .action-buttons .btn:hover { transform: translateY(-2px); box-shadow: 0 7px 14px rgba(0, 0, 0, 0.15); } .nav-tabs { border-bottom: none; margin: 1.5rem 0; } .nav-tabs .nav-link { border: none; color: #6c757d; font-weight: 500; padding: 0.75rem 1.5rem; border-radius: 50px; margin-right: 0.5rem; transition: all 0.3s ease; } .nav-tabs .nav-link.active { background-color: var(--primary-color); color: white; box-shadow: 0 4px 6px rgba(67, 97, 238, 0.3); } .nav-tabs .nav-link:not(.active):hover { color: var(--primary-color); background-color: rgba(67, 97, 238, 0.1); } .search-box { background: white; border-radius: 50px; padding: 0.5rem; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.05); } .search-box input { border: none; border-radius: 50px 0 0 50px; padding: 0.75rem 1.5rem; } .search-box input:focus { box-shadow: none; } .search-box button { border-radius: 0 50px 50px 0; background-color: var(--primary-color); color: white; border: none; padding: 0.75rem 1.5rem; transition: all 0.3s ease; } .search-box button:hover { background-color: var(--secondary-color); } .contact-card { background: white; border-radius: 15px; box-shadow: 0 5px 15px rgba(0, 0, 0, 0.05); transition: all 0.3s ease; margin-bottom: 1.5rem; overflow: hidden; } .contact-card:hover { transform: translateY(-5px); box-shadow: 0 15px 30px rgba(0, 0, 0, 0.1); } .contact-avatar { width: 80px; height: 80px; border-radius: 50%; object-fit: cover; border: 3px solid white; box-shadow: 0 5px 15px rgba(0, 0, 0, 0.1); } .contact-info { padding: 1.5rem; } .contact-name { font-weight: 600; color: var(--dark-color); margin-bottom: 0.5rem; } .contact-phone { color: #6c757d; font-size: 0.9rem; } .status-badge { font-size: 0.8rem; padding: 0.35rem 0.75rem; border-radius: 50px; font-weight: 500; } .action-btn { min-width: 100px; border-radius: 50px; font-size: 0.85rem; font-weight: 600; padding: 0.5rem 1rem; margin: 0.25rem; transition: all 0.3s ease; } .action-btn:hover { transform: translateY(-2px); box-shadow: 0 5px 10px rgba(0, 0, 0, 0.1); } .pagination { margin-top: 3rem; justify-content: center; } .page-item .page-link { border-radius: 50px !important; margin: 0 0.25rem; border: none; color: var(--dark-color); font-weight: 500; min-width: 40px; text-align: center; } .page-item.active .page-link { background: linear-gradient(135deg, var(--primary-color), var(--secondary-color)); color: white; box-shadow: 0 4px 6px rgba(67, 97, 238, 0.3); } .no-contacts { text-align: center; padding: 3rem; color: #6c757d; } .no-contacts i { font-size: 3rem; color: #dee2e6; margin-bottom: 1rem; } /* 天气预报样式 / .weather-widget { background: linear-gradient(135deg, #1e3c72, #2a5298); color: white; border-radius: 15px; padding: 2rem; margin-bottom: 2rem; box-shadow: 0 10px 20px rgba(0, 0, 0, 0.15); position: relative; overflow: hidden; } .weather-widget::before { content: ""; position: absolute; top: -50%; right: -50%; width: 200%; height: 200%; background: radial-gradient(circle, rgba(255,255,255,0.1) 0%, rgba(255,255,255,0) 70%); z-index: 0; } .weather-content { position: relative; z-index: 1; } .weather-title { font-weight: 700; margin: 0; display: flex; align-items: center; font-size: 1.5rem; } .weather-title i { font-size: 1.8rem; margin-right: 1rem; } .weather-details { display: flex; align-items: center; justify-content: space-between; margin-top: 1.5rem; } .weather-info { flex: 1; } .weather-city { font-size: 1.8rem; font-weight: 600; margin-bottom: 0.5rem; } .weather-desc { font-size: 1.2rem; opacity: 0.9; margin-bottom: 0.5rem; text-transform: capitalize; } .weather-temp { font-size: 3rem; font-weight: 700; margin: 0; line-height: 1; } .weather-icon-container { width: 100px; height: 100px; display: flex; align-items: center; justify-content: center; } .weather-icon { width: 100%; height: 100%; object-fit: contain; filter: drop-shadow(0 0 10px rgba(255, 255, 255, 0.3)); } .weather-extra { display: flex; justify-content: space-between; margin-top: 1.5rem; padding-top: 1rem; border-top: 1px solid rgba(255, 255, 255, 0.2); } .weather-extra-item { text-align: center; flex: 1; } .weather-extra-label { font-size: 0.9rem; opacity: 0.8; margin-bottom: 0.3rem; } .weather-extra-value { font-size: 1.2rem; font-weight: 600; } / 天气动画 / @keyframes fadeIn { from { opacity: 0; transform: translateY(10px); } to { opacity: 1; transform: translateY(0); } } .weather-animate { animation: fadeIn 0.5s ease-out forwards; } / 移动端特定样式 / @media (max-width: 768px) { .header-section { padding: 1.5rem; } .action-buttons { margin-top: 1rem; } .contact-avatar { width: 60px; height: 60px; } .action-btn { min-width: auto; padding: 0.4rem 0.8rem; font-size: 0.8rem; } .search-box input, .search-box button { padding: 0.5rem 1rem; } .nav-tabs .nav-link { padding: 0.5rem 1rem; margin-right: 0.25rem; } .weather-widget { padding: 1.5rem; } .weather-temp { font-size: 2rem; } .weather-city { font-size: 1.2rem; } } / 触摸友好元素 */ .btn, .form-control, .nav-link, .page-link { touch-action: manipulation; } </style> </head> <body>

${isBlacklist ? '黑名单管理' : '联系人中心'}

新增联系人  查看事项

实时天气

唐山

--

--°C

湿度

--%

风速

-- m/s

气压

-- hPa

正常联系人

黑名单

筛选条件
<form class="row g-3" method="get" action="${pageContext.request.contextPath}/contact"> <input type="hidden" name="action" value="${isBlacklist ? 'blacklist' : ''}">

<input type="text" class="form-control" name="search" placeholder="搜索姓名/电话" value="${param.search}" aria-label="Search"> <button class="btn btn-primary" type="submit"> 搜索了一个 </button>

<select class="form-select" name="gender" onchange="this.form.submit()"> <option value="">所有性别</option> <option value="男" ${param.gender == '男' ? 'selected' : ''}>男</option> <option value="女" ${param.gender == '女' ? 'selected' : ''}>女</option> </select>

重置
</form>
<c:if test="${empty contacts}">

暂无联系人数据

点击"新增联系人"按钮添加您的第一个联系人

</c:if>
<c:forEach var="contact" items="${contacts}">

${contact.ctName} <c:if test="${contact.ctDelete == 1}"> 已屏蔽 </c:if>

${contact.ctPhone}
${isBlacklist ? '黑名单' : '正常'}

详情  事项 <c:choose> <c:when test="${isBlacklist}"> 恢复 </c:when> <c:otherwise> <c:if test="${contact.ctDelete == 0}"> 屏蔽 </c:if> </c:otherwise> </c:choose>

</c:forEach>
<c:if test="${not empty contacts}">

<c:if test="${currentPage > 1}">

</c:if> <c:forEach begin="1" end="${totalPages}" var="i">
${i}
</c:forEach> <c:if test="${currentPage < totalPages}">

</c:if>

</c:if>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script> <script> // 获取天气数据 function getWeather() { const apiKey = "d9bd1b0c215b38f1ceba3a8b06818789"; // OpenWeatherMap API密钥 const city = "Tangshan"; // 唐山的英文名 // 获取唐山天气 fetch(`https://api.openweathermap.org/data/2.5/weather?q=${city}&appid=${apiKey}&units=metric&lang=zh_cn`) .then(response => { if (!response.ok) { throw new Error("网络响应错误：" + response.statusText); } return response.json(); }) .then(data => { updateWeatherUI(data); }) .catch(error => { console.error('获取天气数据失败:', error); showWeatherError(); }); } // 更新天气UI function updateWeatherUI(data) { document.getElementById('weatherTemp').textContent = `${Math.round(data.main.temp)}°C`; document.getElementById('weatherDesc').textContent = data.weather[0]?.description || "--"; document.getElementById('weatherIcon').src = `https://openweathermap.org/img/wn/${data.weather.icon}@2x.png`; document.getElementById('weatherHumidity').textContent = `${data.main.humidity}%`; document.getElementById('weatherWind').textContent = `${data.wind.speed} m/s`; document.getElementById('weatherPressure').textContent = `${data.main.pressure} hPa`; } // 显示天气错误 function showWeatherError() { document.getElementById('weatherCity').textContent = "天气数据获取失败"; document.getElementById('weatherDesc').textContent = "请检查网络连接"; document.getElementById('weatherTemp').textContent = "--°C"; document.getElementById('weatherHumidity').textContent = "--%"; document.getElementById('weatherWind').textContent = "-- m/s"; document.getElementById('weatherPressure').textContent = "-- hPa"; } // 页面加载完成后获取天气数据 document.addEventListener('DOMContentLoaded', getWeather); </script> </body> </html>控制台日志：D:\apache-tomcat-8.5.99\bin\catalina.bat run [2025-06-10 09:39:00,390] Artifact firstweb2:war: Waiting for server connection to start artifact deployment... Using CATALINA_BASE: "C:\Users\DELL\AppData\Local\JetBrains\IntelliJIdea2022.2\tomcat\673dafd6-7580-4107-962c-bab19e459aa1" Using CATALINA_HOME: "D:\apache-tomcat-8.5.99" Using CATALINA_TMPDIR: "D:\apache-tomcat-8.5.99\temp" Using JRE_HOME: "D:\java" Using CLASSPATH: "D:\apache-tomcat-8.5.99\bin\bootstrap.jar;D:\apache-tomcat-8.5.99\bin\tomcat-juli.jar" Using CATALINA_OPTS: "" NOTE: Picked up JDK_JAVA_OPTIONS: --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.rmi/sun.rmi.transport=ALL-UNNAMED 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: Server.服务器版本: Apache Tomcat/8.5.99 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: 服务器构建: Feb 14 2024 22:52:13 UTC 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: 服务器版本号: 8.5.99.0 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: 操作系统名称: Windows 11 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: OS.版本: 10.0 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: 架构: amd64 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: Java 环境变量: D:\java 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: Java虚拟机版本: 18.0.2.1+1-1 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: JVM.供应商: Oracle Corporation 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: CATALINA_BASE: C:\Users\DELL\AppData\Local\JetBrains\IntelliJIdea2022.2\tomcat\673dafd6-7580-4107-962c-bab19e459aa1 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: CATALINA_HOME: D:\apache-tomcat-8.5.99 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: 命令行参数： --add-opens=java.base/java.lang=ALL-UNNAMED 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: 命令行参数： --add-opens=java.base/java.io=ALL-UNNAMED 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: 命令行参数： --add-opens=java.base/java.util=ALL-UNNAMED 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: 命令行参数： --add-opens=java.base/java.util.concurrent=ALL-UNNAMED 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: 命令行参数： --add-opens=java.rmi/sun.rmi.transport=ALL-UNNAMED 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: 命令行参数： -Djava.util.logging.config.file=C:\Users\DELL\AppData\Local\JetBrains\IntelliJIdea2022.2\tomcat\673dafd6-7580-4107-962c-bab19e459aa1\conf\logging.properties 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: 命令行参数： -Djava.util.logging.manager=org.apache.juli.ClassLoaderLogManager 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: 命令行参数： -Dcom.sun.management.jmxremote= 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: 命令行参数： -Dcom.sun.management.jmxremote.port=1099 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: 命令行参数： -Dcom.sun.management.jmxremote.ssl=false 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: 命令行参数： -Dcom.sun.management.jmxremote.password.file=C:\Users\DELL\AppData\Local\JetBrains\IntelliJIdea2022.2\tomcat\673dafd6-7580-4107-962c-bab19e459aa1\jmxremote.password 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: 命令行参数： -Dcom.sun.management.jmxremote.access.file=C:\Users\DELL\AppData\Local\JetBrains\IntelliJIdea2022.2\tomcat\673dafd6-7580-4107-962c-bab19e459aa1\jmxremote.access 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: 命令行参数： -Djava.rmi.server.hostname=127.0.0.1 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: 命令行参数： -Djdk.tls.ephemeralDHKeySize=2048 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: 命令行参数： -Djava.protocol.handler.pkgs=org.apache.catalina.webresources 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: 命令行参数： -Dignore.endorsed.dirs= 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: 命令行参数： -Dcatalina.base=C:\Users\DELL\AppData\Local\JetBrains\IntelliJIdea2022.2\tomcat\673dafd6-7580-4107-962c-bab19e459aa1 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: 命令行参数： -Dcatalina.home=D:\apache-tomcat-8.5.99 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.VersionLoggerListener log 信息: 命令行参数： -Djava.io.tmpdir=D:\apache-tomcat-8.5.99\temp 6月 10, 2025 9:39:01 下午 org.apache.catalina.core.AprLifecycleListener lifecycleEvent 信息: 使用APR版本[1.7.4]加载了基于APR的Apache Tomcat本机库[1.2.39]。 6月 10, 2025 9:39:01 下午 org.apache.catalina.core.AprLifecycleListener lifecycleEvent 信息: APR功能：IPv6[true]、sendfile[true]、accept filters[false]、random[true]、UDS [{4}]。 6月 10, 2025 9:39:01 下午 org.apache.catalina.core.AprLifecycleListener lifecycleEvent 信息: APR/OpenSSL配置：useAprConnector[false]，useOpenSSL[true] 6月 10, 2025 9:39:01 下午 org.apache.catalina.core.AprLifecycleListener initializeSSL 信息: OpenSSL成功初始化 [OpenSSL 3.0.11 19 Sep 2023] 6月 10, 2025 9:39:01 下午 org.apache.coyote.AbstractProtocol init 信息: 初始化协议处理器 ["http-nio-8080"] 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.Catalina load 信息: Initialization processed in 370 ms 6月 10, 2025 9:39:01 下午 org.apache.catalina.core.StandardService startInternal 信息: 正在启动服务[Catalina] 6月 10, 2025 9:39:01 下午 org.apache.catalina.core.StandardEngine startInternal 信息: 正在启动 Servlet 引擎：[Apache Tomcat/8.5.99] 6月 10, 2025 9:39:01 下午 org.apache.coyote.AbstractProtocol start 信息: 开始协议处理句柄["http-nio-8080"] 6月 10, 2025 9:39:01 下午 org.apache.catalina.startup.Catalina start 信息: Server startup in 82 ms Connected to server [2025-06-10 09:39:01,626] Artifact firstweb2:war: Artifact is being deployed, please wait... 6月 10, 2025 9:39:02 下午 org.apache.jasper.servlet.TldScanner scanJars 信息: 至少有一个JAR被扫描用于TLD但尚未包含TLD。为此记录器启用调试日志记录，以获取已扫描但未在其中找到TLD的完整JAR列表。在扫描期间跳过不需要的JAR可以缩短启动时间和JSP编译时间。 ־ ʼ === ݿ === û (user_message) ϵ ˱ (contact_message) ϵ ͼƬ (contact_picture_message) ϵ (contact_matter_message) === === ־ ʼ === ݿ === û (user_message) ϵ ˱ (contact_message) ϵ ͼƬ (contact_picture_message) ϵ (contact_matter_message) === === [2025-06-10 09:39:02,537] Artifact firstweb2:war: Artifact is deployed successfully [2025-06-10 09:39:02,537] Artifact firstweb2:war: Deploy took 911 milliseconds === Ϣ === URL: http://localhost:8080/firstweb2_war/ 󷽷 : GET : null û ỰID: === Ϣ === URL: http://localhost:8080/firstweb2_war/ 󷽷 : GET : null û ỰID: === Ӧ Ϣ === Ӧ״̬: 200 ʱ : 2ms === === === Ӧ Ϣ === Ӧ״̬: 200 ʱ : 3ms === === === Ϣ === URL: http://localhost:8080/firstweb2_war/login 󷽷 : POST : null û ỰID: === Ϣ === URL: http://localhost:8080/firstweb2_war/login 󷽷 : POST : null û ỰID: === Ӧ Ϣ === Ӧ״̬: 302 ʱ : 29ms === === === Ӧ Ϣ === Ӧ״̬: 302 ʱ : 29ms === === === Ϣ === URL: http://localhost:8080/firstweb2_war/contact 󷽷 : GET : null û ỰID: EDB570CA9B045C3BD0946687C2639008 === Ϣ === URL: http://localhost:8080/firstweb2_war/contact 󷽷 : GET : null û ỰID: EDB570CA9B045C3BD0946687C2639008 6月 10, 2025 9:39:11 下午 org.apache.catalina.startup.HostConfig deployDirectory 信息: 把web 应用程序部署到目录 [D:\apache-tomcat-8.5.99\webapps\manager] 6月 10, 2025 9:39:11 下午 org.apache.catalina.startup.HostConfig deployDirectory 信息: Web应用程序目录[D:\apache-tomcat-8.5.99\webapps\manager]的部署已在[153]毫秒内完成 === Ϣ === URL: http://localhost:8080/firstweb2_war/contact 󷽷 : GET : action=detail&ctId=0000000002 û ỰID: EDB570CA9B045C3BD0946687C2639008 === Ϣ === URL: http://localhost:8080/firstweb2_war/contact 󷽷 : GET : action=detail&ctId=0000000002 û ỰID: EDB570CA9B045C3BD0946687C2639008 === Ӧ Ϣ === Ӧ״̬: 200 ʱ : 180ms === === === Ӧ Ϣ === Ӧ״̬: 200 ʱ : 180ms === === === Ϣ === URL: http://localhost:8080/firstweb2_war/contact 󷽷 : POST : action=update&ctId=0000000002 û ỰID: EDB570CA9B045C3BD0946687C2639008 === Ϣ === URL: http://localhost:8080/firstweb2_war/contact 󷽷 : POST : action=update&ctId=0000000002 û ỰID: EDB570CA9B045C3BD0946687C2639008 === Ӧ Ϣ === Ӧ״̬: 302 ʱ : 93ms === === === Ӧ Ϣ === Ӧ״̬: 302 ʱ : 93ms === === === Ϣ === URL: http://localhost:8080/firstweb2_war/contact 󷽷 : GET : action=detail&ctId=0000000002 û ỰID: EDB570CA9B045C3BD0946687C2639008 === Ϣ === URL: http://localhost:8080/firstweb2_war/contact 󷽷 : GET : action=detail&ctId=0000000002 û ỰID: EDB570CA9B045C3BD0946687C2639008 === Ӧ Ϣ === Ӧ״̬: 200 ʱ : 31ms === === === Ӧ Ϣ === Ӧ״̬: 200 ʱ : 32ms === === === Ϣ === URL: http://localhost:8080/firstweb2_war/contact 󷽷 : GET : null û ỰID: EDB570CA9B045C3BD0946687C2639008 === Ϣ === URL: http://localhost:8080/firstweb2_war/contact 󷽷 : GET : null û ỰID: EDB570CA9B045C3BD0946687C2639008 还是无法查看天气情况，无法更新，请基于我的代码给出具体的解决措施和代码，代码要完完整整的，要全部的

- **API key无效或过期**：天气服务API key未正确配置或未更新。 - **网络连接问题**：服务器无法访问外部API（如防火墙限制）。 - **数据解析错误**：API响应（如JSON）解析代码逻辑错误。 - **编码问题**：字符集...

linux_jdk1.8

JDK是 Java 语言的软件开发工具包，JDK（TM）8 64位简而言之就是一款针对java编程的软件工具，是学习java编程的第一步。JDK作为Java语言的软件开发工具包.

jdk-1.8 linux版本 centos rpm安装版本

java jdk 1.8 linux 资源包新的特性： Lambda表达式函数式接口方法引用和构造器调用 Stream API 接口中的默认方法和静态方法新时间日期API

jdk1.8.0_121_20171108-01

centos 64位 jdk1.8.0_121 linux环境请注意。jdk1.8.0_121_20171108.zip.001

center os7 安装gcc/make所需要的rpm包

center os7 安装gcc/make所需要的rpm包，经过验证安装没有问题

centos7下使用的jdk1.8

linux下的jdk的压缩包当然也可以在官网下载,下载路径:http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html centos7配置环境可以参考:https://blog.csdn.net/ezbuy/article/details/80347329

Echarts - 鼠标 hover 移入去显示数据

效果图实现方法官方文档(tooltip)：https://echarts.apache.org/zh/option.html#grid.tooltip 在配置项中加入以下代码： // 具体配置请查阅配置文档(上方链接) tooltip : { trigger: 'axis', axisPointer: { type: 'cross',//指示类型 label: { // 横纵坐标指示区块颜色 backgrou

项目管理专责安全责任制.doc

CentOS 7 安装配置 JDK 1.8 和 Maven 环境的详细教程

jdk-8u202-linux-x64-2020.zip

centos jdk 1.8

linux（center OS7）安装JDK、tomcat、mysql 搭建java web项目运行环境

linux – CenterOS 搞定 java 环境 （jdk1.8）

hadoop2.7.5下载 centeros6.7环境jdk1.8编译

Hadoop2.7.5在CentOS6.7与JDK1.8环境下的编译指南

Flutter在生产环境的部署与监控：架构师必备知识

HarmonyOS应用框架深度解析：揭秘架构设计内幕

VMWARE centerOs8.5 配置jdk

精准营销系统

📱 智能营销系统

📈 系统功能

📋 请选择操作类型

数据分析与模型训练

用户转化预测

预测结果

精准营销系统

📱 智能营销系统

📈 系统功能

📋 请选择操作类型

数据分析与模型训练

用户转化预测

预测结果

精准营销系统

📱 智能营销系统

📈 系统功能

📋 请选择操作类型

数据分析与模型训练

用户转化预测

预测结果

精准营销系统

📱 智能营销系统

📈 系统功能

📋 请选择操作类型

数据分析与模型训练

用户转化预测

预测结果

${isBlacklist ? '黑名单管理' : '联系人中心'}

实时天气

筛选条件

暂无联系人数据

${contact.ctName} <c:if test="${contact.ctDelete == 1}"> 已屏蔽 </c:if>

linux_jdk1.8

jdk-1.8 linux版本 centos rpm安装版本

jdk1.8.0_121_20171108-01

center os7 安装gcc/make所需要的rpm包

centos7下使用的jdk1.8

Echarts - 鼠标 hover 移入去显示数据

项目管理专责安全责任制.doc

最新资源

linux – CenterOS 搞定 java 环境（jdk1.8）