| | |
| | | import plotly.graph_objects as go |
| | | import pandas as pd |
| | | import numpy as np |
| | | import joblib |
| | | import os |
| | | from datetime import datetime, timedelta |
| | | from app.services.extruder_service import ExtruderService |
| | | from app.services.main_process_service import MainProcessService |
| | |
| | | from sklearn.svm import SVR |
| | | from sklearn.neural_network import MLPRegressor |
| | | |
| | | |
| | | |
| | | # 导入稳态识别功能 |
| | | class SteadyStateDetector: |
| | | def __init__(self): |
| | | pass |
| | | |
| | | def detect_steady_state(self, df, weight_col='米重', window_size=20, std_threshold=0.5, duration_threshold=60): |
| | | """ |
| | | 稳态识别逻辑:标记米重数据中的稳态段 |
| | | :param df: 包含米重数据的数据框 |
| | | :param weight_col: 米重列名 |
| | | :param window_size: 滑动窗口大小(秒) |
| | | :param std_threshold: 标准差阈值 |
| | | :param duration_threshold: 稳态持续时间阈值(秒) |
| | | :return: 包含稳态标记的数据框和稳态信息 |
| | | """ |
| | | if df is None or df.empty: |
| | | return df, [] |
| | | |
| | | # 确保时间列是datetime类型 |
| | | df['time'] = pd.to_datetime(df['time']) |
| | | |
| | | # 计算滚动统计量 |
| | | df['rolling_std'] = df[weight_col].rolling(window=window_size, min_periods=5).std() |
| | | df['rolling_mean'] = df[weight_col].rolling(window=window_size, min_periods=5).mean() |
| | | |
| | | # 计算波动范围 |
| | | df['fluctuation_range'] = (df['rolling_std'] / df['rolling_mean']) * 100 |
| | | df['fluctuation_range'] = df['fluctuation_range'].fillna(0) |
| | | |
| | | # 标记稳态点 |
| | | df['is_steady'] = 0 |
| | | steady_condition = ( |
| | | (df['fluctuation_range'] < std_threshold) & |
| | | (df[weight_col] >= 0.1) |
| | | ) |
| | | df.loc[steady_condition, 'is_steady'] = 1 |
| | | |
| | | # 识别连续稳态段 |
| | | steady_segments = [] |
| | | current_segment = {} |
| | | |
| | | for i, row in df.iterrows(): |
| | | if row['is_steady'] == 1: |
| | | if not current_segment: |
| | | current_segment = { |
| | | 'start_time': row['time'], |
| | | 'start_idx': i, |
| | | 'weights': [row[weight_col]] |
| | | } |
| | | else: |
| | | current_segment['weights'].append(row[weight_col]) |
| | | else: |
| | | if current_segment: |
| | | current_segment['end_time'] = df.loc[i-1, 'time'] if i > 0 else df.loc[i, 'time'] |
| | | current_segment['end_idx'] = i-1 |
| | | duration = (current_segment['end_time'] - current_segment['start_time']).total_seconds() |
| | | |
| | | if duration >= duration_threshold: |
| | | weights_array = np.array(current_segment['weights']) |
| | | current_segment['duration'] = duration |
| | | current_segment['mean_weight'] = np.mean(weights_array) |
| | | current_segment['std_weight'] = np.std(weights_array) |
| | | current_segment['min_weight'] = np.min(weights_array) |
| | | current_segment['max_weight'] = np.max(weights_array) |
| | | current_segment['fluctuation_range'] = (current_segment['std_weight'] / current_segment['mean_weight']) * 100 |
| | | |
| | | # 计算置信度 |
| | | confidence = 100 - (current_segment['fluctuation_range'] / std_threshold) * 50 |
| | | confidence = max(50, min(100, confidence)) |
| | | current_segment['confidence'] = confidence |
| | | |
| | | steady_segments.append(current_segment) |
| | | |
| | | current_segment = {} |
| | | |
| | | # 处理最后一个稳态段 |
| | | if current_segment: |
| | | current_segment['end_time'] = df['time'].iloc[-1] |
| | | current_segment['end_idx'] = len(df) - 1 |
| | | duration = (current_segment['end_time'] - current_segment['start_time']).total_seconds() |
| | | |
| | | if duration >= duration_threshold: |
| | | weights_array = np.array(current_segment['weights']) |
| | | current_segment['duration'] = duration |
| | | current_segment['mean_weight'] = np.mean(weights_array) |
| | | current_segment['std_weight'] = np.std(weights_array) |
| | | current_segment['min_weight'] = np.min(weights_array) |
| | | current_segment['max_weight'] = np.max(weights_array) |
| | | current_segment['fluctuation_range'] = (current_segment['std_weight'] / current_segment['mean_weight']) * 100 |
| | | |
| | | confidence = 100 - (current_segment['fluctuation_range'] / std_threshold) * 50 |
| | | confidence = max(50, min(100, confidence)) |
| | | current_segment['confidence'] = confidence |
| | | |
| | | steady_segments.append(current_segment) |
| | | |
| | | # 在数据框中标记完整的稳态段 |
| | | for segment in steady_segments: |
| | | df.loc[segment['start_idx']:segment['end_idx'], 'is_steady'] = 1 |
| | | |
| | | return df, steady_segments |
| | | |
| | | def show_metered_weight_advanced(): |
| | | # 初始化服务 |
| | |
| | | st.session_state['ma_model_type'] = 'RandomForest' |
| | | if 'ma_sequence_length' not in st.session_state: |
| | | st.session_state['ma_sequence_length'] = 10 |
| | | |
| | | if 'ma_use_steady_data' not in st.session_state: |
| | | st.session_state['ma_use_steady_data'] = True |
| | | if 'ma_steady_window' not in st.session_state: |
| | | st.session_state['ma_steady_window'] = 20 |
| | | if 'ma_steady_threshold' not in st.session_state: |
| | | st.session_state['ma_steady_threshold'] = 0.5 |
| | | |
| | | # 默认特征列表(不再允许用户选择) |
| | | default_features = ['螺杆转速', '机头压力', '流程主速', '螺杆温度', |
| | | '后机筒温度', '前机筒温度', '机头温度'] |
| | |
| | | options=model_options, |
| | | key="ma_model_type", |
| | | help="选择用于预测的模型类型" |
| | | ) |
| | | |
| | | # 稳态识别配置 |
| | | st.markdown("---") |
| | | steady_cols = st.columns(3) |
| | | with steady_cols[0]: |
| | | st.write("⚖️ **稳态识别配置**") |
| | | st.checkbox( |
| | | "仅使用稳态数据进行训练", |
| | | value=st.session_state['ma_use_steady_data'], |
| | | key="ma_use_steady_data", |
| | | help="启用后,只使用米重稳态时段的数据进行模型训练" |
| | | ) |
| | | |
| | | with steady_cols[1]: |
| | | st.write("📏 **稳态参数**") |
| | | st.slider( |
| | | "滑动窗口大小 (秒)", |
| | | min_value=5, |
| | | max_value=60, |
| | | value=st.session_state['ma_steady_window'], |
| | | step=5, |
| | | key="ma_steady_window", |
| | | help="用于稳态识别的滑动窗口大小" |
| | | ) |
| | | |
| | | with steady_cols[2]: |
| | | st.write("📊 **稳态阈值**") |
| | | st.slider( |
| | | "波动阈值 (%)", |
| | | min_value=0.1, |
| | | max_value=2.0, |
| | | value=st.session_state['ma_steady_threshold'], |
| | | step=0.1, |
| | | key="ma_steady_threshold", |
| | | help="稳态识别的波动范围阈值" |
| | | ) |
| | | |
| | | |
| | |
| | | |
| | | # 重命名米重列 |
| | | df_analysis.rename(columns={'metered_weight': '米重'}, inplace=True) |
| | | |
| | | # 稳态识别 |
| | | steady_detector = SteadyStateDetector() |
| | | |
| | | # 获取稳态识别参数 |
| | | use_steady_data = st.session_state.get('ma_use_steady_data', True) |
| | | steady_window = st.session_state.get('ma_steady_window', 20) |
| | | steady_threshold = st.session_state.get('ma_steady_threshold', 0.5) |
| | | |
| | | # 执行稳态识别 |
| | | df_analysis_with_steady, steady_segments = steady_detector.detect_steady_state( |
| | | df_analysis, |
| | | weight_col='米重', |
| | | window_size=steady_window, |
| | | std_threshold=steady_threshold |
| | | ) |
| | | |
| | | # 更新df_analysis为包含稳态标记的数据 |
| | | df_analysis = df_analysis_with_steady |
| | | |
| | | # 稳态数据可视化 |
| | | st.subheader("📈 稳态数据分布") |
| | | |
| | | # 创建稳态数据可视化图表 |
| | | fig_steady = go.Figure() |
| | | |
| | | # 添加原始米重曲线 |
| | | fig_steady.add_trace(go.Scatter( |
| | | x=df_analysis['time'], |
| | | y=df_analysis['米重'], |
| | | name='原始米重', |
| | | mode='lines', |
| | | line=dict(color='lightgray', width=1) |
| | | )) |
| | | |
| | | # 添加稳态数据点 |
| | | steady_data_points = df_analysis[df_analysis['is_steady'] == 1] |
| | | fig_steady.add_trace(go.Scatter( |
| | | x=steady_data_points['time'], |
| | | y=steady_data_points['米重'], |
| | | name='稳态米重', |
| | | mode='markers', |
| | | marker=dict(color='green', size=3, opacity=0.6) |
| | | )) |
| | | |
| | | # 添加非稳态数据点 |
| | | non_steady_data_points = df_analysis[df_analysis['is_steady'] == 0] |
| | | fig_steady.add_trace(go.Scatter( |
| | | x=non_steady_data_points['time'], |
| | | y=non_steady_data_points['米重'], |
| | | name='非稳态米重', |
| | | mode='markers', |
| | | marker=dict(color='red', size=3, opacity=0.6) |
| | | )) |
| | | |
| | | # 配置图表布局 |
| | | fig_steady.update_layout( |
| | | title="米重数据稳态分布", |
| | | xaxis=dict(title="时间"), |
| | | yaxis=dict(title="米重 (Kg/m)"), |
| | | legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1), |
| | | height=500 |
| | | ) |
| | | |
| | | # 显示图表 |
| | | st.plotly_chart(fig_steady, use_container_width=True) |
| | | |
| | | # 显示稳态统计 |
| | | total_data = len(df_analysis) |
| | | steady_data = len(df_analysis[df_analysis['is_steady'] == 1]) |
| | | steady_ratio = (steady_data / total_data * 100) if total_data > 0 else 0 |
| | | |
| | | stats_cols = st.columns(3) |
| | | stats_cols[0].metric("总数据量", total_data) |
| | | stats_cols[1].metric("稳态数据量", steady_data) |
| | | stats_cols[2].metric("稳态数据比例", f"{steady_ratio:.1f}%") |
| | | |
| | | # --- 原始数据趋势图 --- |
| | | st.subheader("📈 原始数据趋势图") |
| | |
| | | else: |
| | | try: |
| | | # 准备数据 |
| | | # 根据配置决定是否只使用稳态数据 |
| | | use_steady_data = st.session_state.get('ma_use_steady_data', True) |
| | | if use_steady_data: |
| | | df_filtered = df_analysis[df_analysis['is_steady'] == 1] |
| | | st.info(f"已过滤非稳态数据,使用 {len(df_filtered)} 条稳态数据进行训练") |
| | | else: |
| | | df_filtered = df_analysis.copy() |
| | | |
| | | # 首先确保df_analysis中没有NaN值 |
| | | df_analysis_clean = df_analysis.dropna(subset=default_features + ['米重']) |
| | | df_analysis_clean = df_filtered.dropna(subset=default_features + ['米重']) |
| | | |
| | | # 检查清理后的数据量 |
| | | if len(df_analysis_clean) < 30: |
| | |
| | | # 创建一个新的DataFrame来存储所有特征和目标变量 |
| | | all_features = df_analysis_clean[default_features + ['米重']].copy() |
| | | |
| | | |
| | | |
| | | |
| | | |
| | | |
| | | # 清理所有NaN值 |
| | | all_features_clean = all_features.dropna() |
| | |
| | | ) |
| | | st.plotly_chart(fig_importance, width='stretch') |
| | | |
| | | # --- 预测功能 --- |
| | | st.subheader("🔮 米重预测") |
| | | |
| | | # 创建预测表单 |
| | | st.write("输入特征值进行米重预测:") |
| | | predict_cols = st.columns(2) |
| | | input_features = {} |
| | | |
| | | for i, feature in enumerate(default_features): |
| | | with predict_cols[i % 2]: |
| | | # 获取特征的统计信息 |
| | | min_val = df_analysis_clean[feature].min() |
| | | max_val = df_analysis_clean[feature].max() |
| | | mean_val = df_analysis_clean[feature].mean() |
| | | |
| | | input_features[feature] = st.number_input( |
| | | f"{feature}", |
| | | key=f"ma_pred_{feature}", |
| | | value=float(mean_val), |
| | | min_value=float(min_val), |
| | | max_value=float(max_val), |
| | | step=0.1 |
| | | ) |
| | | |
| | | if st.button("预测米重"): |
| | | # 准备预测数据 |
| | | input_df = pd.DataFrame([input_features]) |
| | | |
| | | # 合并特征 |
| | | input_combined = pd.concat([input_df], axis=1) |
| | | |
| | | # 预测 |
| | | if model_type in ['SVR', 'MLP']: |
| | | input_scaled = scaler_X.transform(input_combined) |
| | | prediction_scaled = model.predict(input_scaled) |
| | | predicted_weight = scaler_y.inverse_transform(prediction_scaled.reshape(-1, 1)).ravel()[0] |
| | | |
| | | else: |
| | | predicted_weight = model.predict(input_combined)[0] |
| | | |
| | | # 显示预测结果 |
| | | st.success(f"预测米重: {predicted_weight:.4f} Kg/m") |
| | | |
| | | # --- 模型保存 --- |
| | | st.subheader("� 模型保存") |
| | | |
| | | # 创建模型目录(如果不存在) |
| | | model_dir = "saved_models" |
| | | os.makedirs(model_dir, exist_ok=True) |
| | | |
| | | # 准备模型信息 |
| | | model_info = { |
| | | 'model': model, |
| | | 'features': feature_columns, |
| | | 'scaler_X': scaler_X if model_type in ['SVR', 'MLP'] else None, |
| | | 'scaler_y': scaler_y if model_type in ['SVR', 'MLP'] else None, |
| | | 'model_type': model_type, |
| | | 'created_at': datetime.now(), |
| | | 'r2_score': r2, |
| | | 'mse': mse, |
| | | 'mae': mae, |
| | | 'rmse': rmse, |
| | | 'use_steady_data': use_steady_data |
| | | } |
| | | |
| | | # 生成模型文件名 |
| | | model_filename = f"advanced_{model_type.lower()}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.joblib" |
| | | model_path = os.path.join(model_dir, model_filename) |
| | | |
| | | # 保存模型 |
| | | joblib.dump(model_info, model_path) |
| | | |
| | | st.success(f"模型已成功保存: {model_filename}") |
| | | st.info(f"保存路径: {model_path}") |
| | | |
| | | # --- 数据预览 --- |
| | | st.subheader("🔍 数据预览") |
| | | st.dataframe(df_analysis.head(20), width='stretch') |