baoshiwei
2026-01-20 faa25a85c10aa0fa2df824318a4bfa542f6a5a46
app/services/data_processing_service.py
@@ -18,24 +18,15 @@
            # 处理缺失值
            cleaned_df = cleaned_df.fillna(0)
            
            # 确保数据类型正确
            cleaned_df['count_under'] = cleaned_df['count_under'].astype(int)
            cleaned_df['count_in_range'] = cleaned_df['count_in_range'].astype(int)
            cleaned_df['count_over'] = cleaned_df['count_over'].astype(int)
            # 确保数据类型正确(仅当列存在时)
            for col in ['count_under', 'count_in_range', 'count_over']:
                if col in cleaned_df.columns:
                    cleaned_df[col] = cleaned_df[col].astype(int)
            
            # 确保time是datetime类型并处理时区
            if 'time' in cleaned_df.columns:
                # 转换为datetime类型
                cleaned_df['time'] = pd.to_datetime(cleaned_df['time'])
                # 处理时区
                # 检查是否已经有时区信息
                if cleaned_df['time'].dt.tz is None:
                    # 如果没有时区信息,假设是UTC时间并添加时区
                    cleaned_df['time'] = cleaned_df['time'].dt.tz_localize('UTC')
                # 转换为上海时区(UTC+8)
                cleaned_df['time'] = cleaned_df['time'].dt.tz_convert('Asia/Shanghai')
            
            return cleaned_df
        except Exception as e:
@@ -53,25 +44,17 @@
        
        try:
            stats = {
                'total_records': len(df),
                'count_under': {
                    'mean': df['count_under'].mean(),
                    'sum': df['count_under'].sum(),
                    'max': df['count_under'].max(),
                    'min': df['count_under'].min()
                },
                'count_in_range': {
                    'mean': df['count_in_range'].mean(),
                    'sum': df['count_in_range'].sum(),
                    'max': df['count_in_range'].max(),
                    'min': df['count_in_range'].min()
                },
                'count_over': {
                    'mean': df['count_over'].mean(),
                    'sum': df['count_over'].sum(),
                    'max': df['count_over'].max(),
                    'min': df['count_over'].min()
                'total_records': len(df)
                }
            # 仅当列存在时计算统计信息
            for col in ['count_under', 'count_in_range', 'count_over']:
                if col in df.columns:
                    stats[col] = {
                        'mean': df[col].mean(),
                        'sum': df[col].sum(),
                        'max': df[col].max(),
                        'min': df[col].min()
            }
            return stats
        except Exception as e: