| | |
| | | # 处理缺失值 |
| | | cleaned_df = cleaned_df.fillna(0) |
| | | |
| | | # 确保数据类型正确 |
| | | cleaned_df['count_under'] = cleaned_df['count_under'].astype(int) |
| | | cleaned_df['count_in_range'] = cleaned_df['count_in_range'].astype(int) |
| | | cleaned_df['count_over'] = cleaned_df['count_over'].astype(int) |
| | | # 确保数据类型正确(仅当列存在时) |
| | | for col in ['count_under', 'count_in_range', 'count_over']: |
| | | if col in cleaned_df.columns: |
| | | cleaned_df[col] = cleaned_df[col].astype(int) |
| | | |
| | | # 确保time是datetime类型并处理时区 |
| | | if 'time' in cleaned_df.columns: |
| | | # 转换为datetime类型 |
| | | cleaned_df['time'] = pd.to_datetime(cleaned_df['time']) |
| | | |
| | | # 处理时区 |
| | | # 检查是否已经有时区信息 |
| | | if cleaned_df['time'].dt.tz is None: |
| | | # 如果没有时区信息,假设是UTC时间并添加时区 |
| | | cleaned_df['time'] = cleaned_df['time'].dt.tz_localize('UTC') |
| | | |
| | | # 转换为上海时区(UTC+8) |
| | | cleaned_df['time'] = cleaned_df['time'].dt.tz_convert('Asia/Shanghai') |
| | | |
| | | return cleaned_df |
| | | except Exception as e: |
| | |
| | | |
| | | try: |
| | | stats = { |
| | | 'total_records': len(df), |
| | | 'count_under': { |
| | | 'mean': df['count_under'].mean(), |
| | | 'sum': df['count_under'].sum(), |
| | | 'max': df['count_under'].max(), |
| | | 'min': df['count_under'].min() |
| | | }, |
| | | 'count_in_range': { |
| | | 'mean': df['count_in_range'].mean(), |
| | | 'sum': df['count_in_range'].sum(), |
| | | 'max': df['count_in_range'].max(), |
| | | 'min': df['count_in_range'].min() |
| | | }, |
| | | 'count_over': { |
| | | 'mean': df['count_over'].mean(), |
| | | 'sum': df['count_over'].sum(), |
| | | 'max': df['count_over'].max(), |
| | | 'min': df['count_over'].min() |
| | | } |
| | | 'total_records': len(df) |
| | | } |
| | | |
| | | # 仅当列存在时计算统计信息 |
| | | for col in ['count_under', 'count_in_range', 'count_over']: |
| | | if col in df.columns: |
| | | stats[col] = { |
| | | 'mean': df[col].mean(), |
| | | 'sum': df[col].sum(), |
| | | 'max': df[col].max(), |
| | | 'min': df[col].min() |
| | | } |
| | | return stats |
| | | except Exception as e: |
| | | print(f"计算统计信息失败: {e}") |