Aggregations¶
DictDB provides SQL-like aggregation functions for computing statistics on your data.
Aggregation Classes¶
Import the aggregation classes directly from dictdb:
| Class | Description | Requires Field |
|---|---|---|
Count |
Count records or non-None values | Optional |
Sum |
Sum of numeric values | Yes |
Avg |
Average of numeric values | Yes |
Min |
Minimum value | Yes |
Max |
Maximum value | Yes |
Basic Usage¶
Use table.aggregate() to compute aggregations:
from dictdb import DictDB, Count, Sum, Avg, Min, Max
db = DictDB()
db.create_table("employees")
employees = db.get_table("employees")
# Insert sample data
employees.insert({"id": 1, "name": "Alice", "department": "IT", "salary": 75000})
employees.insert({"id": 2, "name": "Bob", "department": "HR", "salary": 65000})
employees.insert({"id": 3, "name": "Charlie", "department": "IT", "salary": 85000})
employees.insert({"id": 4, "name": "Diana", "department": "IT", "salary": 70000})
# Count all employees
result = employees.aggregate(count=Count())
# {"count": 4}
Count¶
Count() can be used in two ways:
# Count all records
employees.aggregate(total=Count())
# {"total": 4}
# Count non-None values in a specific field
employees.aggregate(with_salary=Count("salary"))
# {"with_salary": 4}
Count Without Field vs With Field
Count()counts all records, including those with None valuesCount("field")counts only records where the field is not None
Sum¶
Sum("field") calculates the sum of numeric values:
Returns None if no records match or all values are None.
Avg¶
Avg("field") calculates the average of numeric values:
Returns None if no records match or all values are None.
Min and Max¶
Min("field") and Max("field") find extreme values:
employees.aggregate(
lowest=Min("salary"),
highest=Max("salary")
)
# {"lowest": 65000, "highest": 85000}
These work with any comparable type (numbers, strings, dates, etc.):
employees.aggregate(
first_name=Min("name"),
last_name=Max("name")
)
# {"first_name": "Alice", "last_name": "Diana"}
Multiple Aggregations¶
Compute multiple aggregations in a single call:
result = employees.aggregate(
count=Count(),
total=Sum("salary"),
average=Avg("salary"),
minimum=Min("salary"),
maximum=Max("salary")
)
# {
# "count": 4,
# "total": 295000,
# "average": 73750.0,
# "minimum": 65000,
# "maximum": 85000
# }
Filtering with WHERE¶
Use the where parameter to filter records before aggregating:
from dictdb import Condition
# Count IT employees
result = employees.aggregate(
where=Condition(employees.department == "IT"),
count=Count()
)
# {"count": 3}
# Average salary in IT department
result = employees.aggregate(
where=Condition(employees.department == "IT"),
avg_salary=Avg("salary")
)
# {"avg_salary": 76666.67}
GROUP BY¶
Use group_by to compute aggregations for each unique value of a field:
# Count employees by department
results = employees.aggregate(
group_by="department",
count=Count()
)
# [
# {"department": "IT", "count": 3},
# {"department": "HR", "count": 1}
# ]
Return Type Changes with GROUP BY
- Without
group_by: Returns a single dictionary - With
group_by: Returns a list of dictionaries (one per group)
Multiple Aggregations per Group¶
results = employees.aggregate(
group_by="department",
count=Count(),
total_salary=Sum("salary"),
avg_salary=Avg("salary"),
max_salary=Max("salary")
)
# [
# {"department": "IT", "count": 3, "total_salary": 230000, "avg_salary": 76666.67, "max_salary": 85000},
# {"department": "HR", "count": 1, "total_salary": 65000, "avg_salary": 65000.0, "max_salary": 65000}
# ]
Multiple GROUP BY Fields¶
Group by multiple fields using a list:
db.create_table("sales")
sales = db.get_table("sales")
sales.insert({"region": "East", "product": "A", "amount": 100})
sales.insert({"region": "East", "product": "A", "amount": 150})
sales.insert({"region": "East", "product": "B", "amount": 200})
sales.insert({"region": "West", "product": "A", "amount": 120})
results = sales.aggregate(
group_by=["region", "product"],
count=Count(),
total=Sum("amount")
)
# [
# {"region": "East", "product": "A", "count": 2, "total": 250},
# {"region": "East", "product": "B", "count": 1, "total": 200},
# {"region": "West", "product": "A", "count": 1, "total": 120}
# ]
Combining GROUP BY with WHERE¶
Filter records before grouping:
results = employees.aggregate(
where=Condition(employees.salary >= 70000),
group_by="department",
count=Count(),
avg_salary=Avg("salary")
)
# [
# {"department": "IT", "count": 3, "avg_salary": 76666.67}
# ]
# HR employee (salary 65000) was excluded by the WHERE clause
Complete Example¶
from dictdb import DictDB, Condition, Count, Sum, Avg, Min, Max
db = DictDB()
db.create_table("orders", primary_key="order_id")
orders = db.get_table("orders")
# Insert sample orders
orders.insert({"order_id": 1, "customer": "Alice", "product": "Widget", "quantity": 5, "price": 10.00})
orders.insert({"order_id": 2, "customer": "Bob", "product": "Gadget", "quantity": 2, "price": 25.00})
orders.insert({"order_id": 3, "customer": "Alice", "product": "Gadget", "quantity": 1, "price": 25.00})
orders.insert({"order_id": 4, "customer": "Charlie", "product": "Widget", "quantity": 10, "price": 10.00})
orders.insert({"order_id": 5, "customer": "Alice", "product": "Widget", "quantity": 3, "price": 10.00})
# Overall statistics
stats = orders.aggregate(
total_orders=Count(),
total_quantity=Sum("quantity"),
avg_price=Avg("price")
)
# {"total_orders": 5, "total_quantity": 21, "avg_price": 16.0}
# Orders per customer
by_customer = orders.aggregate(
group_by="customer",
order_count=Count(),
total_quantity=Sum("quantity")
)
# [
# {"customer": "Alice", "order_count": 3, "total_quantity": 9},
# {"customer": "Bob", "order_count": 1, "total_quantity": 2},
# {"customer": "Charlie", "order_count": 1, "total_quantity": 10}
# ]
# Revenue by product (Widget orders only)
widget_stats = orders.aggregate(
where=Condition(orders.product == "Widget"),
group_by="customer",
order_count=Count(),
total_qty=Sum("quantity")
)
# [
# {"customer": "Alice", "order_count": 2, "total_qty": 8},
# {"customer": "Charlie", "order_count": 1, "total_qty": 10}
# ]
Handling None Values¶
Aggregations handle None values consistently:
Count()without field: Counts all records (including those with None)Count("field"): Counts only non-None valuesSum,Avg,Min,Max: Ignore None values in calculations- If all values are None:
Sum,Avg,Min,MaxreturnNone
db.create_table("data")
data = db.get_table("data")
data.insert({"id": 1, "value": 10})
data.insert({"id": 2, "value": None})
data.insert({"id": 3, "value": 20})
result = data.aggregate(
total_records=Count(),
non_null_values=Count("value"),
sum_values=Sum("value"),
avg_values=Avg("value")
)
# {
# "total_records": 3,
# "non_null_values": 2,
# "sum_values": 30,
# "avg_values": 15.0
# }