baoshiwei
2026-02-02 4048393750de17cfa2ae59fec1380a81ea2b2a6b
app/services/data_processing_service.py
@@ -18,24 +18,15 @@
            # 处理缺失值
            cleaned_df = cleaned_df.fillna(0)
            
            # 确保数据类型正确
            cleaned_df['count_under'] = cleaned_df['count_under'].astype(int)
            cleaned_df['count_in_range'] = cleaned_df['count_in_range'].astype(int)
            cleaned_df['count_over'] = cleaned_df['count_over'].astype(int)
            # 确保数据类型正确(仅当列存在时)
            for col in ['count_under', 'count_in_range', 'count_over']:
                if col in cleaned_df.columns:
                    cleaned_df[col] = cleaned_df[col].astype(int)
            
            # 确保time是datetime类型并处理时区
            if 'time' in cleaned_df.columns:
                # 转换为datetime类型
                cleaned_df['time'] = pd.to_datetime(cleaned_df['time'])
                # 处理时区
                # 检查是否已经有时区信息
                if cleaned_df['time'].dt.tz is None:
                    # 如果没有时区信息,假设是UTC时间并添加时区
                    cleaned_df['time'] = cleaned_df['time'].dt.tz_localize('UTC')
                # 转换为上海时区(UTC+8)
                cleaned_df['time'] = cleaned_df['time'].dt.tz_convert('Asia/Shanghai')
            
            return cleaned_df
        except Exception as e:
@@ -53,26 +44,18 @@
        
        try:
            stats = {
                'total_records': len(df),
                'count_under': {
                    'mean': df['count_under'].mean(),
                    'sum': df['count_under'].sum(),
                    'max': df['count_under'].max(),
                    'min': df['count_under'].min()
                },
                'count_in_range': {
                    'mean': df['count_in_range'].mean(),
                    'sum': df['count_in_range'].sum(),
                    'max': df['count_in_range'].max(),
                    'min': df['count_in_range'].min()
                },
                'count_over': {
                    'mean': df['count_over'].mean(),
                    'sum': df['count_over'].sum(),
                    'max': df['count_over'].max(),
                    'min': df['count_over'].min()
                }
                'total_records': len(df)
            }
            # 仅当列存在时计算统计信息
            for col in ['count_under', 'count_in_range', 'count_over']:
                if col in df.columns:
                    stats[col] = {
                        'mean': df[col].mean(),
                        'sum': df[col].sum(),
                        'max': df[col].max(),
                        'min': df[col].min()
                    }
            return stats
        except Exception as e:
            print(f"计算统计信息失败: {e}")
@@ -228,20 +211,21 @@
        try:
            # 识别极值点
            extreme_points = self.identify_local_maxima(df)
            # print("识别极值点:", extreme_points)
            # 识别阶段最大值
            phase_maxima = self.identify_phase_maxima(df)
            # phase_maxima = self.identify_phase_maxima(df)
            # print("识别阶段最大值:", phase_maxima)
            
            # 计算每个极值点的合格率
            if not extreme_points.empty:
                extreme_points['pass_rate'] = extreme_points.apply(self.calculate_pass_rate, axis=1)
            
            # 计算整体合格率
            overall_pass_rate = self.calculate_overall_pass_rate(df)
            overall_pass_rate = self.calculate_overall_pass_rate(extreme_points)
            
            return {
                'extreme_points': extreme_points,
                'phase_maxima': phase_maxima,
                'phase_maxima': pd.DataFrame(),
                'overall_pass_rate': overall_pass_rate
            }
        except Exception as e: