Jatin Madaan
- Mar 18, 2020
- 7 min read

SQL Group Functions

Let's say we have below student data

select * from student;

id. student_name student_gender student_age
1	Sally		 Female	                14
2	Edward		 Male			12
3	Jon		 Male			13
4	Liana		 Female			10
5	Ben	         Male			11
6	Elice		 Female			12
7	Nick		 Male			9
8	Josh		 Male			12
9	Liza		 Female			10
10	Wick		 Male			15

Query 1 : Get running sum of all ages based on id .

For this we will be using sum() over () function which works as below :

Similarly we can total sum of revenue (aggregating sum) for each month.

select id,student_name,student_gender,student_age , sum(student_age) over (order by id ) as running_sum from student


1	Sally	Female	14	14
2	Edward	Male	12	26
3	Jon	Male	13	39
4	Liana	Female	10	49
5	Ben	Male	11	60
6	Elice	Female	12	72
7	Nick	Male	9	81
8	Josh	Male	12	93
9	Liza	Female	10	103
10	Wick	Male	15	118

Query 2 : Get running average (Similar to sum)

select id,student_name,student_gender,student_age , trunc(avg(student_age) over (order by id )) as running_avg from student


1	Sally	Female	14	14
2	Edward	Male	12	13
3	Jon	Male	13	13
4	Liana	Female	10	12
5	Ben	Male	11	12
6	Elice	Female	12	12
7	Nick	Male	9	11
8	Josh	Male	12	11
9	Liza	Female	10	11
10	Wick	Male	15	11

Query 3 : Partitioning running total by column values

Partitioning here means grouping of data base on column value specified .

select id, student_name,student_gender,student_age , sum(student_age) over (partition by student_gender order by id ) as running_group_sum from student;

1	Sally	Female	14	14
4	Liana	Female	10	24
6	Elice	Female	12	36
9	Liza	Female	10	46
2	Edward	Male	12	12
3	Jon	Male	13	25
5	Ben	Male	11	36
7	Nick	Male	9	45
8	Josh	Male	12	57
10	Wick	Male	15	72

eg 2 : Partition based on 2 columns

select id, student_name,student_gender,student_age , sum(student_age) over (partition by student_gender,student_name order by id ) as running_group_sum from student;

6	Elice	Female	12	12
4	Liana	Female	10	10
9	Liza	Female	10	10
1	Sally	Female	14	14
5	Ben	Male	11	11
2	Edward	Male	12	12
3	Jon	Male	13	13
8	Josh	Male	12	12
7	Nick	Male	9	9
10	Wick	Male	15	15
11	Wick	Male	100	115

eg 3 : We use order by on key column to maintain integrity but it can be done on any column .

select id, student_name,student_gender,student_age , sum(student_age) over (order by student_name ) as running_sum from student;

5	Ben	Male	11	11
2	Edward	Male	12	23
6	Elice	Female	12	35
3	Jon	Male	13	48
8	Josh	Male	12	60
4	Liana	Female	10	70
9	Liza	Female	10	80
7	Nick	Male	9	89
1	Sally	Female	14	103
10	Wick	Male	15	118

eg 4 : Issues in case of duplicates

select id, student_name,student_gender,student_age , sum(student_age) over (order by id ) as running_sum from student;

1	Sally	Female	14	14
2	Edward	Male	12	26
3	Jon	Male	13	39
4	Liana	Female	10	49
5	Ben	Male	11	60
6	Elice	Female	12	72
7	Nick	Male	9	81
8	Josh	Male	12	93
9	Liza	Female	10	103
10	Wick	Male	15	133
10	duplicat Female	15	133

Duplicate value is summed in starting only . To avoid this use "rows between unbounded preceding and current row" post order by clause .

select id, student_name,student_gender,student_age , sum(student_age) over (order by id rows between unbounded preceding and current row) as running_sum from student;


1	Sally	Female	14	14
2	Edward	Male	12	26
3	Jon	Male	13	39
4	Liana	Female	10	49
5	Ben	Male	11	60
6	Elice	Female	12	72
7	Nick	Male	9	81
8	Josh	Male	12	93
9	Liza	Female	10	103
10	Wick	Male	15	118
10	duplicate	Female	15	133

Query 4 : Using Rank and dense_rank()

- dense_rank() : No skipping of ranks (results are dense)


select s.*,dense_rank() over (order by student_age) as rank from student s

7	Nick	Male	9	1
9	Liza	Female	10	2
4	Liana	Female	10	2
5	Ben	Male	11	3
2	Edward	Male	12	4
6	Elice	Female	12	4
8	Josh	Male	12	4
3	Jon	Male	13	5
1	Sally	Female	14	6
10	Wick	Male	15	7
10	duplicat Female	15	7

eg 2 : Getting 2nd highest age using dense_rank ()

select student_age from 
(select s.*,dense_rank() over (order by student_age desc) as rank from student s) where rank=2;

o/
14

- rank () function -- it will skip the ranks which are used and increase counts/counter accordingly .

select s.*,rank() over (order by student_age desc) as rank from student s


10	duplicat Female	15	1
10	Wick	Male	15	1
1	Sally	Female	14	3
3	Jon	Male	13	4
2	Edward	Male	12	5
6	Elice	Female	12	5
8	Josh	Male	12	5
5	Ben	Male	11	8
4	Liana	Female	10	9
9	Liza	Female	10	9
7	Nick	Male	9	11

Query 5 : Get unique id to each row (use row_nuber() functions).

select s.*,row_number() over (order by student_age desc) as row_num from student s

10	duplicate	Female	15	1
10	Wick	Male	15	2
1	Sally	Female	14	3
3	Jon	Male		13	4
2	Edward	Male	12	5
6	Elice	Female	12	6
8	Josh	Male	12	7
5	Ben	Male		11	8
4	Liana	Female	10	9
9	Liza	Female	10	10
7	Nick	Male	9	11

eg 2 : selecting only unique values (non - duplicate on 1 column )

Select student_age FROM Student WHERE ROWID <>

(Select max(rowid) from Student s where student_age=s.student_age);

14
12
13
10
11
12
9
12
10
15

Note : Replace select with delete to remove duplicate values .

Query 6 : percent rank ()

select s.*,percent_rank() over (order by student_age desc) as rank from student s


10	duplicat Female	15	0
10	Wick	Male	15	0
1	Sally	Female	14	0.2
3	Jon	Male	13	0.3
2	Edward	Male	12	0.4
6	Elice	Female	12	0.4
8	Josh	Male	12	0.4
5	Ben	Male	11	0.7
4	Liana	Female	10	0.8
9	Liza	Female	10	0.8
7	Nick	Male	9	1

Query 7 : Display rank in 1 and 2 ie there should not be any rank 3 or forward. We can use ntile function for same -

select s.*,ntile(2) over (order by student_age desc) as rank from student s

10	duplicate	Female	15	1
10	Wick	Male	15	1
1	Sally	Female	14	1
3	Jon	Male	13	1
2	Edward	Male	12	1
6	Elice	Female	12	1
8	Josh	Male	12	2
5	Ben	Male	11	2
4	Liana	Female	10	2
9	Liza	Female	10	2
7	Nick	Male	9	2

Query 8 : Lag function

select s.*,lag(student_age,2) over (order by student_age desc) as lag from student s


10	duplicat Female	15	null
10	Wick	Male	15	null
1	Sally	Female	14	15
3	Jon	Male	13	15
2	Edward	Male	12	14
6	Elice	Female	12	13
8	Josh	Male	12	12
5	Ben	Male	11	12
4	Liana	Female	10	12
9	Liza	Female	10	11
7	Nick	Male	9	10

Query 9 : Lead function

select s.*,lead(student_age,2) over (order by student_age desc ) as lag from student s


10	duplicat Female	15	14
10	Wick	Male	15	13
1	Sally	Female	14	12
3	Jon	Male	13	12
2	Edward	Male	12	12
6	Elice	Female	12	11
8	Josh	Male	12	10
5	Ben	Male	11	10
4	Liana	Female	10	9
9	Liza	Female	10	null 
7	Nick	Male	9	null

- In above Query 2 is offset ie how many rows lead/lag is required.

Query 10 : Grouping Functions :

Structure of table :

desc fact_table


Name        Null?    Type         
----------- -------- ------------ 
FACT_1_ID   NOT NULL NUMBER       
FACT_2_ID   NOT NULL NUMBER       
FACT_3_ID   NOT NULL NUMBER       
FACT_4_ID   NOT NULL NUMBER       
SALES_VALUE NOT NULL NUMBER(10,2)

Sample data :

select * from fact_table
2	2	3	6	93.53
1	1	10	9	65.29
1	1	9	2	10.9
1	5	6	5	66.05
2	3	3	10	34.61
2	5	2	7	92.16
2	5	2	2	89.47
2	2	7	9	33.4
1	4	2	1	69
1	4	2	2	56.35
1	2	5	10	96.85
1	2	2	8	59.72
2	5	9	10	77.97
1	5	3	8	96.29
2	2	3	6	75.65
1	3	10	1	51.28
1	2	5	1	89.33
2	1	5	4	65.22

Now let do a normal Group by on Data  

SELECT fact_1_id,
       fact_2_id,
       COUNT(*) AS num_rows,
       SUM(sales_value) AS sales_value
FROM   fact_table
GROUP BY fact_1_id, fact_2_id
ORDER BY fact_1_id, fact_2_id;


1	1	79	3850.64
1	2	115	5613.9
1	3	118	5772.59
1	4	108	5686.19
1	5	102	5377.03
2	1	86	4688.92
2	2	104	5177.06
2	3	84	3766.84
2	4	114	5426.78
2	5	90	4820.52

ROLLUP extension produces group subtotals from right to left and a grand total. If "n" is the number of columns listed in the ROLLUP, there will be n+1 levels of subtotals.

SELECT fact_1_id,
       fact_2_id,
       SUM(sales_value) AS sales_value
FROM   fact_table
GROUP BY ROLLUP (fact_1_id, fact_2_id)
ORDER BY fact_1_id, fact_2_id;

1	1	3850.64
1	2	5613.9
1	3	5772.59
1	4	5686.19
1	5	5377.03
1		26300.35
2	1	4688.92
2	2	5177.06
2	3	3766.84
2	4	5426.78
2	5	4820.52
2		23880.12
		50180.47

CUBE extension will generate subtotals for all combinations of the dimensions specified. If "n" is the number of columns listed in the CUBE, there will be 2n subtotal combinations.

SELECT fact_1_id,
       fact_2_id,
       SUM(sales_value) AS sales_value
FROM   fact_table
GROUP BY CUBE (fact_1_id, fact_2_id)
ORDER BY fact_1_id, fact_2_id;


1	1	3850.64
1	2	5613.9
1	3	5772.59
1	4	5686.19
1	5	5377.03
1		26300.35
2	1	4688.92
2	2	5177.06
2	3	3766.84
2	4	5426.78
2	5	4820.52
2		23880.12
	1	8539.56
	2	10790.96
	3	9539.43
	4	11112.97
	5	10197.55
		50180.47

GROUPING It accepts a single column as a parameter and returns "1" if the column contains a null value generated as part of a subtotal by a ROLLUP or CUBE operation or "0" for any other value, including stored null values.

 SELECT fact_1_id,
       fact_2_id,
       SUM(sales_value) AS sales_value,
       GROUPING(fact_1_id) AS f1g, 
       GROUPING(fact_2_id) AS f2g
FROM   fact_table
GROUP BY CUBE (fact_1_id, fact_2_id)
ORDER BY fact_1_id, fact_2_id;


1	1	3850.64	0	0
1	2	5613.9	0	0
1	3	5772.59	0	0
1	4	5686.19	0	0
1	5	5377.03	0	0
1		26300.35	0	1
2	1	4688.92	0	0
2	2	5177.06	0	0
2	3	3766.84	0	0
2	4	5426.78	0	0
2	5	4820.52	0	0
2		23880.12	0	1
	1	8539.56	1	0
	2	10790.96	1	0
	3	9539.43	1	0
	4	11112.97	1	0
	5	10197.55	1	0
		50180.47	1	1

- LEAD & LAG with eg :

select distinct(fact_2_id),
sum(SALES_VALUE) as sum_sales,
LAG(sum(SALES_VALUE),1) over (order by fact_2_id desc) as lag_1,
LEAD(sum(SALES_VALUE),1) over (order by fact_2_id desc) as lead_1
from fact_table group by fact_2_id

id  sum_sales       lag_1       lead_1
5	10197.55    NULL        11112.97
4	11112.97   10197.55	9539.43
3	9539.43	   11112.97	10790.96
2	10790.96   9539.43	8539.56
1	8539.56	   10790.96	NULL

SQL Group Functions

Comments