Skip to main content

Aggregation & Grouping

Learn how to summarize data using aggregate functions and GROUP BY clauses.


Aggregate Functions Overview

FunctionDescriptionExample
COUNT(*)Count all rowsCOUNT(*)
COUNT(column)Count non-NULL valuesCOUNT(email)
COUNT(DISTINCT col)Count unique valuesCOUNT(DISTINCT customer)
SUM(column)Sum of valuesSUM(amount)
AVG(column)Average of valuesAVG(quantity)
MIN(column)Minimum valueMIN(trandate)
MAX(column)Maximum valueMAX(amount)

Basic Aggregation (No GROUP BY)

When used without GROUP BY, aggregates apply to ALL rows.

Test Case: Overall Statistics

-- Company-wide sales statistics
SELECT
COUNT(*) AS total_invoices,
COUNT(DISTINCT entity) AS unique_customers,
SUM(amount) AS total_revenue,
AVG(amount) AS average_invoice,
MIN(amount) AS smallest_invoice,
MAX(amount) AS largest_invoice,
MIN(trandate) AS first_invoice,
MAX(trandate) AS last_invoice
FROM transaction
WHERE type = 'CustInvc'
AND trandate >= TO_DATE('2024-01-01', 'YYYY-MM-DD')

Sample Result:

total_invoicesunique_customerstotal_revenueaverage_invoicesmallest_invoicelargest_invoice
1,2503422,450,000.001,960.0050.00125,000.00

GROUP BY Basics

GROUP BY splits data into groups and applies aggregates to each group.

Illustration

Test Case: Sales by Customer

-- Total sales per customer
SELECT
c.companyname,
COUNT(*) AS order_count,
SUM(t.amount) AS total_sales,
AVG(t.amount) AS avg_order,
MAX(t.trandate) AS last_order
FROM transaction t
INNER JOIN customer c ON t.entity = c.id
WHERE t.type = 'SalesOrd'
GROUP BY c.id, c.companyname
ORDER BY total_sales DESC
LIMIT 20

Sample Result:

companynameorder_counttotal_salesavg_orderlast_order
Acme Corp45125,000.002,777.782024-12-15
Global Tech3898,500.002,592.112024-12-14
Beta Industries3287,250.002,726.562024-12-10

Multiple GROUP BY Columns

Group by more than one column for detailed breakdowns.

Test Case: Sales by Customer and Month

-- Monthly sales breakdown by customer
SELECT
c.companyname,
TO_CHAR(t.trandate, 'YYYY-MM') AS month,
COUNT(*) AS order_count,
SUM(t.amount) AS monthly_sales
FROM transaction t
INNER JOIN customer c ON t.entity = c.id
WHERE t.type = 'SalesOrd'
AND t.trandate >= TO_DATE('2024-01-01', 'YYYY-MM-DD')
GROUP BY c.id, c.companyname, TO_CHAR(t.trandate, 'YYYY-MM')
ORDER BY c.companyname, month

Sample Result:

companynamemonthorder_countmonthly_sales
Acme Corp2024-01512,500.00
Acme Corp2024-02718,200.00
Acme Corp2024-0349,800.00
Beta Industries2024-0138,400.00

Test Case: Sales by Region and Category

-- Sales breakdown by state and item category
SELECT
c.state,
i.class AS category,
COUNT(DISTINCT t.id) AS order_count,
SUM(tl.amount) AS total_sales
FROM transaction t
INNER JOIN customer c ON t.entity = c.id
INNER JOIN transactionline tl ON t.id = tl.transaction
INNER JOIN item i ON tl.item = i.id
WHERE t.type = 'SalesOrd'
GROUP BY c.state, i.class
ORDER BY c.state, total_sales DESC

HAVING Clause

HAVING filters AFTER aggregation (WHERE filters BEFORE).

Illustration

Test Case: High-Value Customers Only

-- Customers with more than $50,000 in sales
SELECT
c.companyname,
COUNT(*) AS order_count,
SUM(t.amount) AS total_sales
FROM transaction t
INNER JOIN customer c ON t.entity = c.id
WHERE t.type = 'SalesOrd'
GROUP BY c.id, c.companyname
HAVING SUM(t.amount) > 50000
ORDER BY total_sales DESC

Test Case: Active Customers (Multiple Orders)

-- Customers with more than 5 orders
SELECT
c.companyname,
COUNT(*) AS order_count,
SUM(t.amount) AS total_sales,
AVG(t.amount) AS avg_order
FROM transaction t
INNER JOIN customer c ON t.entity = c.id
WHERE t.type = 'SalesOrd'
GROUP BY c.id, c.companyname
HAVING COUNT(*) > 5
ORDER BY order_count DESC

Test Case: Multiple HAVING Conditions

-- High-value, active customers
SELECT
c.companyname,
COUNT(*) AS order_count,
SUM(t.amount) AS total_sales
FROM transaction t
INNER JOIN customer c ON t.entity = c.id
WHERE t.type = 'SalesOrd'
GROUP BY c.id, c.companyname
HAVING COUNT(*) >= 5
AND SUM(t.amount) >= 25000
ORDER BY total_sales DESC

COUNT Variations

COUNT(*) vs COUNT(column) vs COUNT(DISTINCT)

SELECT
-- Count all rows
COUNT(*) AS total_rows,

-- Count rows where email is not NULL
COUNT(email) AS with_email,

-- Count unique states
COUNT(DISTINCT state) AS unique_states,

-- Count unique sales reps
COUNT(DISTINCT salesrep) AS unique_reps

FROM customer
WHERE isinactive = 'F'

Sample Result:

total_rowswith_emailunique_statesunique_reps
1,2501,1804815

Test Case: Data Quality Report

-- Check data completeness
SELECT
COUNT(*) AS total_customers,
COUNT(email) AS has_email,
COUNT(phone) AS has_phone,
COUNT(CASE WHEN email IS NULL THEN 1 END) AS missing_email,
ROUND(COUNT(email) * 100.0 / COUNT(*), 1) AS email_pct
FROM customer
WHERE isinactive = 'F'

Real-World Aggregation Examples

Example 1: Monthly Sales Trend

-- Monthly sales trend for the year
SELECT
TO_CHAR(trandate, 'YYYY-MM') AS month,
COUNT(*) AS invoice_count,
SUM(amount) AS revenue,
AVG(amount) AS avg_invoice,
COUNT(DISTINCT entity) AS unique_customers
FROM transaction
WHERE type = 'CustInvc'
AND EXTRACT(YEAR FROM trandate) = 2024
GROUP BY TO_CHAR(trandate, 'YYYY-MM')
ORDER BY month

Sample Result:

monthinvoice_countrevenueavg_invoiceunique_customers
2024-01125245,0001,96078
2024-02142278,0001,95885
2024-03138265,0001,92082

Example 2: Top Selling Items

-- Top 10 items by revenue
SELECT
i.itemid,
i.displayname,
COUNT(DISTINCT t.id) AS order_count,
SUM(tl.quantity) AS total_qty,
SUM(tl.amount) AS total_revenue,
AVG(tl.rate) AS avg_price
FROM transactionline tl
INNER JOIN transaction t ON tl.transaction = t.id
INNER JOIN item i ON tl.item = i.id
WHERE t.type = 'SalesOrd'
AND t.trandate >= TO_DATE('2024-01-01', 'YYYY-MM-DD')
GROUP BY i.id, i.itemid, i.displayname
ORDER BY total_revenue DESC
LIMIT 10

Example 3: Sales Rep Performance

-- Sales rep performance comparison
SELECT
e.entityid AS sales_rep,
COUNT(DISTINCT t.entity) AS customers_served,
COUNT(*) AS orders,
SUM(t.amount) AS total_sales,
AVG(t.amount) AS avg_order,
MAX(t.trandate) AS last_sale
FROM transaction t
INNER JOIN employee e ON t.salesrep = e.id
WHERE t.type = 'SalesOrd'
AND t.trandate >= TO_DATE('2024-01-01', 'YYYY-MM-DD')
GROUP BY e.id, e.entityid
ORDER BY total_sales DESC

Example 4: Customer Aging Report

-- Accounts receivable aging
SELECT
c.companyname,
SUM(CASE WHEN SYSDATE - t.duedate <= 0 THEN t.amountremaining ELSE 0 END) AS current,
SUM(CASE WHEN SYSDATE - t.duedate BETWEEN 1 AND 30 THEN t.amountremaining ELSE 0 END) AS days_1_30,
SUM(CASE WHEN SYSDATE - t.duedate BETWEEN 31 AND 60 THEN t.amountremaining ELSE 0 END) AS days_31_60,
SUM(CASE WHEN SYSDATE - t.duedate BETWEEN 61 AND 90 THEN t.amountremaining ELSE 0 END) AS days_61_90,
SUM(CASE WHEN SYSDATE - t.duedate > 90 THEN t.amountremaining ELSE 0 END) AS over_90,
SUM(t.amountremaining) AS total_ar
FROM transaction t
INNER JOIN customer c ON t.entity = c.id
WHERE t.type = 'CustInvc'
AND t.amountremaining > 0
GROUP BY c.id, c.companyname
HAVING SUM(t.amountremaining) > 0
ORDER BY total_ar DESC

Example 5: Inventory Summary

-- Inventory levels by location
SELECT
l.name AS location,
COUNT(DISTINCT ib.item) AS unique_items,
SUM(ib.quantityonhand) AS total_qty,
SUM(ib.quantityonhand * i.averagecost) AS inventory_value
FROM inventorybalance ib
INNER JOIN item i ON ib.item = i.id
INNER JOIN location l ON ib.location = l.id
WHERE ib.quantityonhand > 0
GROUP BY l.id, l.name
ORDER BY inventory_value DESC

Common Aggregation Patterns

Percentage of Total

-- Each customer's percentage of total sales
WITH TotalSales AS (
SELECT SUM(amount) AS grand_total
FROM transaction
WHERE type = 'SalesOrd'
)
SELECT
c.companyname,
SUM(t.amount) AS customer_sales,
ROUND(SUM(t.amount) * 100.0 / ts.grand_total, 2) AS pct_of_total
FROM transaction t
INNER JOIN customer c ON t.entity = c.id
CROSS JOIN TotalSales ts
WHERE t.type = 'SalesOrd'
GROUP BY c.id, c.companyname, ts.grand_total
ORDER BY customer_sales DESC
LIMIT 10

Year-Over-Year Comparison

-- Compare this year vs last year by month
SELECT
EXTRACT(MONTH FROM trandate) AS month,
SUM(CASE WHEN EXTRACT(YEAR FROM trandate) = 2024 THEN amount ELSE 0 END) AS sales_2024,
SUM(CASE WHEN EXTRACT(YEAR FROM trandate) = 2023 THEN amount ELSE 0 END) AS sales_2023,
ROUND(
(SUM(CASE WHEN EXTRACT(YEAR FROM trandate) = 2024 THEN amount ELSE 0 END) -
SUM(CASE WHEN EXTRACT(YEAR FROM trandate) = 2023 THEN amount ELSE 0 END)) /
NULLIF(SUM(CASE WHEN EXTRACT(YEAR FROM trandate) = 2023 THEN amount ELSE 0 END), 0) * 100,
2) AS growth_pct
FROM transaction
WHERE type = 'CustInvc'
AND EXTRACT(YEAR FROM trandate) IN (2023, 2024)
GROUP BY EXTRACT(MONTH FROM trandate)
ORDER BY month

Running Count Check

-- Verify data completeness
SELECT
TO_CHAR(trandate, 'YYYY-MM') AS month,
COUNT(*) AS total,
COUNT(entity) AS has_entity,
COUNT(salesrep) AS has_salesrep,
COUNT(amount) AS has_amount
FROM transaction
WHERE type = 'SalesOrd'
GROUP BY TO_CHAR(trandate, 'YYYY-MM')
ORDER BY month

GROUP BY Rules

Rule 1: Non-Aggregated Columns Must Be in GROUP BY

-- WRONG: companyname not in GROUP BY
SELECT companyname, SUM(amount)
FROM transaction t
JOIN customer c ON t.entity = c.id
GROUP BY t.entity -- Missing companyname!

-- CORRECT:
SELECT c.companyname, SUM(t.amount)
FROM transaction t
JOIN customer c ON t.entity = c.id
GROUP BY c.id, c.companyname

Rule 2: WHERE vs HAVING

-- WHERE: Filter rows BEFORE grouping
-- HAVING: Filter groups AFTER aggregation

SELECT entity, SUM(amount) AS total
FROM transaction
WHERE type = 'SalesOrd' -- Before grouping (row filter)
GROUP BY entity
HAVING SUM(amount) > 10000 -- After grouping (aggregate filter)

Quick Reference

Execution Order

  1. FROM / JOIN - Get data from tables
  2. WHERE - Filter individual rows
  3. GROUP BY - Create groups
  4. Aggregate functions - Calculate per group
  5. HAVING - Filter groups
  6. SELECT - Choose columns
  7. ORDER BY - Sort results
  8. LIMIT - Restrict output

Common Patterns

-- Basic grouping
SELECT category, COUNT(*), SUM(amount)
FROM table
GROUP BY category

-- Grouping with filter
SELECT category, SUM(amount)
FROM table
WHERE status = 'Active'
GROUP BY category
HAVING SUM(amount) > 1000

-- Multiple group levels
SELECT year, month, category, SUM(amount)
FROM table
GROUP BY year, month, category
ORDER BY year, month, category