baoshiwei
2026-03-13 6628f663b636675bcaea316f2deaddf337de480e
app/pages/metered_weight_advanced.py
@@ -3,6 +3,8 @@
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import joblib
import os
from datetime import datetime, timedelta
from app.services.extruder_service import ExtruderService
from app.services.main_process_service import MainProcessService
@@ -13,8 +15,107 @@
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
# 导入稳态识别功能
class SteadyStateDetector:
    def __init__(self):
        pass
    def detect_steady_state(self, df, weight_col='米重', window_size=20, std_threshold=0.5, duration_threshold=60):
        """
        稳态识别逻辑:标记米重数据中的稳态段
        :param df: 包含米重数据的数据框
        :param weight_col: 米重列名
        :param window_size: 滑动窗口大小(秒)
        :param std_threshold: 标准差阈值
        :param duration_threshold: 稳态持续时间阈值(秒)
        :return: 包含稳态标记的数据框和稳态信息
        """
        if df is None or df.empty:
            return df, []
        # 确保时间列是datetime类型
        df['time'] = pd.to_datetime(df['time'])
        # 计算滚动统计量
        df['rolling_std'] = df[weight_col].rolling(window=window_size, min_periods=5).std()
        df['rolling_mean'] = df[weight_col].rolling(window=window_size, min_periods=5).mean()
        # 计算波动范围
        df['fluctuation_range'] = (df['rolling_std'] / df['rolling_mean']) * 100
        df['fluctuation_range'] = df['fluctuation_range'].fillna(0)
        # 标记稳态点
        df['is_steady'] = 0
        steady_condition = (
            (df['fluctuation_range'] < std_threshold) &
            (df[weight_col] >= 0.1)
        )
        df.loc[steady_condition, 'is_steady'] = 1
        # 识别连续稳态段
        steady_segments = []
        current_segment = {}
        for i, row in df.iterrows():
            if row['is_steady'] == 1:
                if not current_segment:
                    current_segment = {
                        'start_time': row['time'],
                        'start_idx': i,
                        'weights': [row[weight_col]]
                    }
                else:
                    current_segment['weights'].append(row[weight_col])
            else:
                if current_segment:
                    current_segment['end_time'] = df.loc[i-1, 'time'] if i > 0 else df.loc[i, 'time']
                    current_segment['end_idx'] = i-1
                    duration = (current_segment['end_time'] - current_segment['start_time']).total_seconds()
                    if duration >= duration_threshold:
                        weights_array = np.array(current_segment['weights'])
                        current_segment['duration'] = duration
                        current_segment['mean_weight'] = np.mean(weights_array)
                        current_segment['std_weight'] = np.std(weights_array)
                        current_segment['min_weight'] = np.min(weights_array)
                        current_segment['max_weight'] = np.max(weights_array)
                        current_segment['fluctuation_range'] = (current_segment['std_weight'] / current_segment['mean_weight']) * 100
                        # 计算置信度
                        confidence = 100 - (current_segment['fluctuation_range'] / std_threshold) * 50
                        confidence = max(50, min(100, confidence))
                        current_segment['confidence'] = confidence
                        steady_segments.append(current_segment)
                    current_segment = {}
        # 处理最后一个稳态段
        if current_segment:
            current_segment['end_time'] = df['time'].iloc[-1]
            current_segment['end_idx'] = len(df) - 1
            duration = (current_segment['end_time'] - current_segment['start_time']).total_seconds()
            if duration >= duration_threshold:
                weights_array = np.array(current_segment['weights'])
                current_segment['duration'] = duration
                current_segment['mean_weight'] = np.mean(weights_array)
                current_segment['std_weight'] = np.std(weights_array)
                current_segment['min_weight'] = np.min(weights_array)
                current_segment['max_weight'] = np.max(weights_array)
                current_segment['fluctuation_range'] = (current_segment['std_weight'] / current_segment['mean_weight']) * 100
                confidence = 100 - (current_segment['fluctuation_range'] / std_threshold) * 50
                confidence = max(50, min(100, confidence))
                current_segment['confidence'] = confidence
                steady_segments.append(current_segment)
        # 在数据框中标记完整的稳态段
        for segment in steady_segments:
            df.loc[segment['start_idx']:segment['end_idx'], 'is_steady'] = 1
        return df, steady_segments
def show_metered_weight_advanced():
    # 初始化服务
@@ -35,7 +136,13 @@
        st.session_state['ma_model_type'] = 'RandomForest'
    if 'ma_sequence_length' not in st.session_state:
        st.session_state['ma_sequence_length'] = 10
    if 'ma_use_steady_data' not in st.session_state:
        st.session_state['ma_use_steady_data'] = True
    if 'ma_steady_window' not in st.session_state:
        st.session_state['ma_steady_window'] = 20
    if 'ma_steady_threshold' not in st.session_state:
        st.session_state['ma_steady_threshold'] = 0.5
    # 默认特征列表(不再允许用户选择)
    default_features = ['螺杆转速', '机头压力', '流程主速', '螺杆温度', 
                       '后机筒温度', '前机筒温度', '机头温度']
@@ -126,6 +233,42 @@
                options=model_options,
                key="ma_model_type",
                help="选择用于预测的模型类型"
            )
        # 稳态识别配置
        st.markdown("---")
        steady_cols = st.columns(3)
        with steady_cols[0]:
            st.write("⚖️ **稳态识别配置**")
            st.checkbox(
                "仅使用稳态数据进行训练",
                value=st.session_state['ma_use_steady_data'],
                key="ma_use_steady_data",
                help="启用后,只使用米重稳态时段的数据进行模型训练"
            )
        with steady_cols[1]:
            st.write("📏 **稳态参数**")
            st.slider(
                "滑动窗口大小 (秒)",
                min_value=5,
                max_value=60,
                value=st.session_state['ma_steady_window'],
                step=5,
                key="ma_steady_window",
                help="用于稳态识别的滑动窗口大小"
            )
        with steady_cols[2]:
            st.write("📊 **稳态阈值**")
            st.slider(
                "波动阈值 (%)",
                min_value=0.1,
                max_value=2.0,
                value=st.session_state['ma_steady_threshold'],
                step=0.1,
                key="ma_steady_threshold",
                help="稳态识别的波动范围阈值"
            )
@@ -249,6 +392,82 @@
            # 重命名米重列
            df_analysis.rename(columns={'metered_weight': '米重'}, inplace=True)
            # 稳态识别
            steady_detector = SteadyStateDetector()
            # 获取稳态识别参数
            use_steady_data = st.session_state.get('ma_use_steady_data', True)
            steady_window = st.session_state.get('ma_steady_window', 20)
            steady_threshold = st.session_state.get('ma_steady_threshold', 0.5)
            # 执行稳态识别
            df_analysis_with_steady, steady_segments = steady_detector.detect_steady_state(
                df_analysis,
                weight_col='米重',
                window_size=steady_window,
                std_threshold=steady_threshold
            )
            # 更新df_analysis为包含稳态标记的数据
            df_analysis = df_analysis_with_steady
            # 稳态数据可视化
            st.subheader("📈 稳态数据分布")
            # 创建稳态数据可视化图表
            fig_steady = go.Figure()
            # 添加原始米重曲线
            fig_steady.add_trace(go.Scatter(
                x=df_analysis['time'],
                y=df_analysis['米重'],
                name='原始米重',
                mode='lines',
                line=dict(color='lightgray', width=1)
            ))
            # 添加稳态数据点
            steady_data_points = df_analysis[df_analysis['is_steady'] == 1]
            fig_steady.add_trace(go.Scatter(
                x=steady_data_points['time'],
                y=steady_data_points['米重'],
                name='稳态米重',
                mode='markers',
                marker=dict(color='green', size=3, opacity=0.6)
            ))
            # 添加非稳态数据点
            non_steady_data_points = df_analysis[df_analysis['is_steady'] == 0]
            fig_steady.add_trace(go.Scatter(
                x=non_steady_data_points['time'],
                y=non_steady_data_points['米重'],
                name='非稳态米重',
                mode='markers',
                marker=dict(color='red', size=3, opacity=0.6)
            ))
            # 配置图表布局
            fig_steady.update_layout(
                title="米重数据稳态分布",
                xaxis=dict(title="时间"),
                yaxis=dict(title="米重 (Kg/m)"),
                legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
                height=500
            )
            # 显示图表
            st.plotly_chart(fig_steady, use_container_width=True)
            # 显示稳态统计
            total_data = len(df_analysis)
            steady_data = len(df_analysis[df_analysis['is_steady'] == 1])
            steady_ratio = (steady_data / total_data * 100) if total_data > 0 else 0
            stats_cols = st.columns(3)
            stats_cols[0].metric("总数据量", total_data)
            stats_cols[1].metric("稳态数据量", steady_data)
            stats_cols[2].metric("稳态数据比例", f"{steady_ratio:.1f}%")
            # --- 原始数据趋势图 ---
            st.subheader("📈 原始数据趋势图")
@@ -381,8 +600,16 @@
            else:
                try:
                    # 准备数据
                    # 根据配置决定是否只使用稳态数据
                    use_steady_data = st.session_state.get('ma_use_steady_data', True)
                    if use_steady_data:
                        df_filtered = df_analysis[df_analysis['is_steady'] == 1]
                        st.info(f"已过滤非稳态数据,使用 {len(df_filtered)} 条稳态数据进行训练")
                    else:
                        df_filtered = df_analysis.copy()
                    # 首先确保df_analysis中没有NaN值
                    df_analysis_clean = df_analysis.dropna(subset=default_features + ['米重'])
                    df_analysis_clean = df_filtered.dropna(subset=default_features + ['米重'])
                    
                    # 检查清理后的数据量
                    if len(df_analysis_clean) < 30:
@@ -391,8 +618,8 @@
                        # 创建一个新的DataFrame来存储所有特征和目标变量
                        all_features = df_analysis_clean[default_features + ['米重']].copy()
                        
                  
                        # 清理所有NaN值
                        all_features_clean = all_features.dropna()
@@ -568,49 +795,38 @@
                                    )
                                    st.plotly_chart(fig_importance, width='stretch')
                                # --- 预测功能 ---
                                st.subheader("🔮 米重预测")
                                # 创建预测表单
                                st.write("输入特征值进行米重预测:")
                                predict_cols = st.columns(2)
                                input_features = {}
                                for i, feature in enumerate(default_features):
                                    with predict_cols[i % 2]:
                                        # 获取特征的统计信息
                                        min_val = df_analysis_clean[feature].min()
                                        max_val = df_analysis_clean[feature].max()
                                        mean_val = df_analysis_clean[feature].mean()
                                        input_features[feature] = st.number_input(
                                            f"{feature}",
                                            key=f"ma_pred_{feature}",
                                            value=float(mean_val),
                                            min_value=float(min_val),
                                            max_value=float(max_val),
                                            step=0.1
                                        )
                                if st.button("预测米重"):
                                    # 准备预测数据
                                    input_df = pd.DataFrame([input_features])
                                    # 合并特征
                                    input_combined = pd.concat([input_df], axis=1)
                                    # 预测
                                    if model_type in ['SVR', 'MLP']:
                                        input_scaled = scaler_X.transform(input_combined)
                                        prediction_scaled = model.predict(input_scaled)
                                        predicted_weight = scaler_y.inverse_transform(prediction_scaled.reshape(-1, 1)).ravel()[0]
                                    else:
                                        predicted_weight = model.predict(input_combined)[0]
                                    # 显示预测结果
                                    st.success(f"预测米重: {predicted_weight:.4f} Kg/m")
                                # --- 模型保存 ---
                                st.subheader("� 模型保存")
                                # 创建模型目录(如果不存在)
                                model_dir = "saved_models"
                                os.makedirs(model_dir, exist_ok=True)
                                # 准备模型信息
                                model_info = {
                                    'model': model,
                                    'features': feature_columns,
                                    'scaler_X': scaler_X if model_type in ['SVR', 'MLP'] else None,
                                    'scaler_y': scaler_y if model_type in ['SVR', 'MLP'] else None,
                                    'model_type': model_type,
                                    'created_at': datetime.now(),
                                    'r2_score': r2,
                                    'mse': mse,
                                    'mae': mae,
                                    'rmse': rmse,
                                    'use_steady_data': use_steady_data
                                }
                                # 生成模型文件名
                                model_filename = f"advanced_{model_type.lower()}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.joblib"
                                model_path = os.path.join(model_dir, model_filename)
                                # 保存模型
                                joblib.dump(model_info, model_path)
                                st.success(f"模型已成功保存: {model_filename}")
                                st.info(f"保存路径: {model_path}")
                                # --- 数据预览 ---
                                st.subheader("🔍 数据预览")
                                st.dataframe(df_analysis.head(20), width='stretch')