1from collections import defaultdict
2from typing import Any
3
4class DataProcessor:
5 def __init__(self, records: list[dict[str, Any]]) -> None:
6 self.records = records
7 self._cache: dict[str, list[dict]] = {}
8
9 def group_by(self, key: str) -> dict[str, list[dict]]:
10 if key in self._cache:
11 return self._cache[key]
12 groups: dict[str, list[dict]] = defaultdict(list)
13 for record in self.records:
14 value = str(record.get(key, "unknown"))
15 groups[value].append(record)
16 self._cache[key] = dict(groups)
17 return self._cache[key]
18
19 def aggregate(self, field: str) -> dict[str, float]:
20 values = [r[field] for r in self.records if field in r]
21 if not values:
22 return {"min": 0.0, "max": 0.0, "avg": 0.0}
23 return {
24 "min": float(min(values)),
25 "max": float(max(values)),
26 "avg": sum(values) / len(values),
27 }
28
29 def filter_records(self, **kwargs: Any) -> list[dict]:
30 result = self.records
31 for key, value in kwargs.items():
32 result = [r for r in result if r.get(key) == value]
33 return result
34
35 def to_summary(self) -> dict[str, int | str]:
36 fields = {k for r in self.records for k in r.keys()}
37 return {
38 "total_records": len(self.records),
39 "fields": ", ".join(sorted(fields)),
40 }