baoshiwei
2026-03-13 6628f663b636675bcaea316f2deaddf337de480e
app/pages/metered_weight_regression.py
@@ -3,12 +3,117 @@
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import joblib
import os
from datetime import datetime, timedelta
from app.services.extruder_service import ExtruderService
from app.services.main_process_service import MainProcessService
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
# 导入稳态识别功能
class SteadyStateDetector:
    def __init__(self):
        pass
    def detect_steady_state(self, df, weight_col='米重', window_size=20, std_threshold=0.5, duration_threshold=60):
        """
        稳态识别逻辑:标记米重数据中的稳态段
        :param df: 包含米重数据的数据框
        :param weight_col: 米重列名
        :param window_size: 滑动窗口大小(秒)
        :param std_threshold: 标准差阈值
        :param duration_threshold: 稳态持续时间阈值(秒)
        :return: 包含稳态标记的数据框和稳态信息
        """
        if df is None or df.empty:
            return df, []
        # 确保时间列是datetime类型
        df['time'] = pd.to_datetime(df['time'])
        # 计算滚动统计量
        df['rolling_std'] = df[weight_col].rolling(window=window_size, min_periods=5).std()
        df['rolling_mean'] = df[weight_col].rolling(window=window_size, min_periods=5).mean()
        # 计算波动范围
        df['fluctuation_range'] = (df['rolling_std'] / df['rolling_mean']) * 100
        df['fluctuation_range'] = df['fluctuation_range'].fillna(0)
        # 标记稳态点
        df['is_steady'] = 0
        steady_condition = (
            (df['fluctuation_range'] < std_threshold) &
            (df[weight_col] >= 0.1)
        )
        df.loc[steady_condition, 'is_steady'] = 1
        # 识别连续稳态段
        steady_segments = []
        current_segment = {}
        for i, row in df.iterrows():
            if row['is_steady'] == 1:
                if not current_segment:
                    current_segment = {
                        'start_time': row['time'],
                        'start_idx': i,
                        'weights': [row[weight_col]]
                    }
                else:
                    current_segment['weights'].append(row[weight_col])
            else:
                if current_segment:
                    current_segment['end_time'] = df.loc[i-1, 'time'] if i > 0 else df.loc[i, 'time']
                    current_segment['end_idx'] = i-1
                    duration = (current_segment['end_time'] - current_segment['start_time']).total_seconds()
                    if duration >= duration_threshold:
                        weights_array = np.array(current_segment['weights'])
                        current_segment['duration'] = duration
                        current_segment['mean_weight'] = np.mean(weights_array)
                        current_segment['std_weight'] = np.std(weights_array)
                        current_segment['min_weight'] = np.min(weights_array)
                        current_segment['max_weight'] = np.max(weights_array)
                        current_segment['fluctuation_range'] = (current_segment['std_weight'] / current_segment['mean_weight']) * 100
                        # 计算置信度
                        confidence = 100 - (current_segment['fluctuation_range'] / std_threshold) * 50
                        confidence = max(50, min(100, confidence))
                        current_segment['confidence'] = confidence
                        steady_segments.append(current_segment)
                    current_segment = {}
        # 处理最后一个稳态段
        if current_segment:
            current_segment['end_time'] = df['time'].iloc[-1]
            current_segment['end_idx'] = len(df) - 1
            duration = (current_segment['end_time'] - current_segment['start_time']).total_seconds()
            if duration >= duration_threshold:
                weights_array = np.array(current_segment['weights'])
                current_segment['duration'] = duration
                current_segment['mean_weight'] = np.mean(weights_array)
                current_segment['std_weight'] = np.std(weights_array)
                current_segment['min_weight'] = np.min(weights_array)
                current_segment['max_weight'] = np.max(weights_array)
                current_segment['fluctuation_range'] = (current_segment['std_weight'] / current_segment['mean_weight']) * 100
                confidence = 100 - (current_segment['fluctuation_range'] / std_threshold) * 50
                confidence = max(50, min(100, confidence))
                current_segment['confidence'] = confidence
                steady_segments.append(current_segment)
        # 在数据框中标记完整的稳态段
        for segment in steady_segments:
            df.loc[segment['start_idx']:segment['end_idx'], 'is_steady'] = 1
        return df, steady_segments
def show_metered_weight_regression():
@@ -33,6 +138,12 @@
            '螺杆转速', '机头压力', '流程主速', '螺杆温度', 
            '后机筒温度', '前机筒温度', '机头温度'
        ]
    if 'mr_use_steady_data' not in st.session_state:
        st.session_state['mr_use_steady_data'] = True
    if 'mr_steady_window' not in st.session_state:
        st.session_state['mr_steady_window'] = 20
    if 'mr_steady_threshold' not in st.session_state:
        st.session_state['mr_steady_threshold'] = 0.5
    # 定义回调函数
    def update_dates(qs):
@@ -123,6 +234,42 @@
            st.session_state['mr_time_offset'] = time_offset
        with offset_cols[2]:
            st.write(f"当前偏移: {time_offset} 分钟")
        # 稳态识别配置
        st.markdown("---")
        steady_cols = st.columns(3)
        with steady_cols[0]:
            st.write("⚖️ **稳态识别配置**")
            st.checkbox(
                "仅使用稳态数据进行训练",
                value=st.session_state['mr_use_steady_data'],
                key="mr_use_steady_data",
                help="启用后,只使用米重稳态时段的数据进行模型训练"
            )
        with steady_cols[1]:
            st.write("📏 **稳态参数**")
            st.slider(
                "滑动窗口大小 (秒)",
                min_value=5,
                max_value=60,
                value=st.session_state['mr_steady_window'],
                step=5,
                key="mr_steady_window",
                help="用于稳态识别的滑动窗口大小"
            )
        with steady_cols[2]:
            st.write("📊 **稳态阈值**")
            st.slider(
                "波动阈值 (%)",
                min_value=0.1,
                max_value=2.0,
                value=st.session_state['mr_steady_threshold'],
                step=0.1,
                key="mr_steady_threshold",
                help="稳态识别的波动范围阈值"
            )
        # 特征选择
        st.markdown("---")
@@ -305,6 +452,82 @@
            # 重命名米重列
            df_analysis.rename(columns={'metered_weight': '米重'}, inplace=True)
            # 稳态识别
            steady_detector = SteadyStateDetector()
            # 获取稳态识别参数
            use_steady_data = st.session_state.get('mr_use_steady_data', True)
            steady_window = st.session_state.get('mr_steady_window', 20)
            steady_threshold = st.session_state.get('mr_steady_threshold', 0.5)
            # 执行稳态识别
            df_analysis_with_steady, steady_segments = steady_detector.detect_steady_state(
                df_analysis,
                weight_col='米重',
                window_size=steady_window,
                std_threshold=steady_threshold
            )
            # 更新df_analysis为包含稳态标记的数据
            df_analysis = df_analysis_with_steady
            # 稳态数据可视化
            st.subheader("📈 稳态数据分布")
            # 创建稳态数据可视化图表
            fig_steady = go.Figure()
            # 添加原始米重曲线
            fig_steady.add_trace(go.Scatter(
                x=df_analysis['time'],
                y=df_analysis['米重'],
                name='原始米重',
                mode='lines',
                line=dict(color='lightgray', width=1)
            ))
            # 添加稳态数据点
            steady_data_points = df_analysis[df_analysis['is_steady'] == 1]
            fig_steady.add_trace(go.Scatter(
                x=steady_data_points['time'],
                y=steady_data_points['米重'],
                name='稳态米重',
                mode='markers',
                marker=dict(color='green', size=3, opacity=0.6)
            ))
            # 添加非稳态数据点
            non_steady_data_points = df_analysis[df_analysis['is_steady'] == 0]
            fig_steady.add_trace(go.Scatter(
                x=non_steady_data_points['time'],
                y=non_steady_data_points['米重'],
                name='非稳态米重',
                mode='markers',
                marker=dict(color='red', size=3, opacity=0.6)
            ))
            # 配置图表布局
            fig_steady.update_layout(
                title="米重数据稳态分布",
                xaxis=dict(title="时间"),
                yaxis=dict(title="米重 (Kg/m)"),
                legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
                height=500
            )
            # 显示图表
            st.plotly_chart(fig_steady, use_container_width=True)
            # 显示稳态统计
            total_data = len(df_analysis)
            steady_data = len(df_analysis[df_analysis['is_steady'] == 1])
            steady_ratio = (steady_data / total_data * 100) if total_data > 0 else 0
            stats_cols = st.columns(3)
            stats_cols[0].metric("总数据量", total_data)
            stats_cols[1].metric("稳态数据量", steady_data)
            stats_cols[2].metric("稳态数据比例", f"{steady_ratio:.1f}%")
            # --- 原始数据趋势图 ---
            st.subheader("📈 原始数据趋势图")
@@ -440,8 +663,16 @@
                    st.warning(f"数据中缺少以下特征: {', '.join(missing_features)}")
                else:
                    # 准备数据
                    X = df_analysis[st.session_state['mr_selected_features']]
                    y = df_analysis['米重']
                    # 根据配置决定是否只使用稳态数据
                    use_steady_data = st.session_state.get('mr_use_steady_data', True)
                    if use_steady_data:
                        df_filtered = df_analysis[df_analysis['is_steady'] == 1]
                        st.info(f"已过滤非稳态数据,使用 {len(df_filtered)} 条稳态数据进行训练")
                    else:
                        df_filtered = df_analysis.copy()
                    X = df_filtered[st.session_state['mr_selected_features']]
                    y = df_filtered['米重']
                    # 清理数据中的NaN值
                    combined = pd.concat([X, y], axis=1)
@@ -454,7 +685,7 @@
                        # 重新分离X和y
                        X_clean = combined_clean[st.session_state['mr_selected_features']]
                        y_clean = combined_clean['米重']
                        # 分割训练集和测试集
                        X_train, X_test, y_train, y_test = train_test_split(X_clean, y_clean, test_size=0.2, random_state=42)
@@ -580,37 +811,42 @@
                        })
                        st.dataframe(coef_df, use_container_width=True)
                        # --- 预测功能 ---
                        st.subheader("🔮 米重预测")
                        # --- 模型保存功能 ---
                        st.subheader("💾 模型保存")
                        # 创建预测表单
                        st.write("输入特征值进行米重预测:")
                        predict_cols = st.columns(2)
                        input_features = {}
                        # 创建模型保存表单
                        st.write("保存训练好的模型权重:")
                        model_name = st.text_input(
                            "模型名称",
                            value=f"linear_regression_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
                            help="请输入模型名称,模型将保存为该名称的.joblib文件"
                        )
                        for i, feature in enumerate(st.session_state['mr_selected_features']):
                            with predict_cols[i % 2]:
                                # 获取特征的统计信息
                                min_val = df_analysis[feature].min()
                                max_val = df_analysis[feature].max()
                                mean_val = df_analysis[feature].mean()
                                input_features[feature] = st.number_input(
                                    f"{feature}",
                                    key=f"pred_{feature}",
                                    value=float(mean_val),
                                    min_value=float(min_val),
                                    max_value=float(max_val),
                                    step=0.1
                                )
                        if st.button("预测米重"):
                            # 准备预测数据
                            input_data = [[input_features[feature] for feature in st.session_state['mr_selected_features']]]
                            # 预测
                            predicted_weight = model.predict(input_data)[0]
                            # 显示预测结果
                            st.success(f"预测米重: {predicted_weight:.4f} Kg/m")
                        if st.button("保存模型"):
                            # 确保模型目录存在
                            model_dir = "saved_models"
                            os.makedirs(model_dir, exist_ok=True)
                            # 保存模型
                            model_path = os.path.join(model_dir, f"{model_name}.joblib")
                            try:
                                # 保存模型权重和相关信息
                                model_info = {
                                    'model': model,
                                    'features': st.session_state['mr_selected_features'],
                                    'scaler': None,  # 线性回归不需要标化器
                                    'model_type': 'linear_regression',
                                    'created_at': datetime.now(),
                                    'r2_score': r2,
                                    'mse': mse,
                                    'mae': mae,
                                    'rmse': rmse,
                                    'use_steady_data': use_steady_data
                                }
                                joblib.dump(model_info, model_path)
                                st.success(f"模型已成功保存到: {model_path}")
                            except Exception as e:
                                st.error(f"模型保存失败: {e}")
                        # --- 数据预览 ---
                        st.subheader("🔍 数据预览")