Skip to main content

APPROX_COUNT_DISTINCT

Syntax

APPROX_COUNT_DISTINCT(expr) OVER over_clause

Description

This function calculates the number of distinct rows in the expression expr and returns an approximate value. It is significantly faster than COUNT() when handling large amounts of data, and the deviation from the exact result is negligible.

Examples

CREATE TABLE employees (manager_id INT,last_name varchar(50),hiredate varchar(50),SALARY INT);
INSERT INTO employees VALUES(100, 'Raphaely', '2017-07-01', 1700);
INSERT INTO employees VALUES(100, 'De Haan', '2018-05-01',11000);
INSERT INTO employees VALUES(100, 'Errazuriz', '2017-07-21', 1400);
INSERT INTO employees VALUES(100, 'Hartstein', '2019-05-01',14000);
INSERT INTO employees VALUES(100, 'Raphaely', '2017-07-22', 1700);
INSERT INTO employees VALUES(100, 'Weiss', '2019-07-11',13500);
INSERT INTO employees VALUES(100, 'Russell', '2019-10-05', 13000);
INSERT INTO employees VALUES(100, 'Partners', '2018-12-01',14000);
INSERT INTO employees VALUES(200, 'Ross', '2019-06-11',13500);
INSERT INTO employees VALUES(200, 'Bell', '2019-05-25', 13000);
INSERT INTO employees VALUES(200, 'Part', '2018-08-11',14000);
COMMIT;

SELECT last_name, salary, APPROX_COUNT_DISTINCT(salary) OVER (ORDER BY hiredate) "Variance"
FROM employees WHERE manager_id = 100 ORDER BY last_name, salary, "Variance";
+-----------+--------+----------+
| LAST_NAME | SALARY | Variance |
+-----------+--------+----------+
| De Haan | 11000 | 3 |
| Errazuriz | 1400 | 2 |
| Hartstein | 14000 | 4 |
| Partners | 14000 | 4 |
| Raphaely | 1700 | 1 |
| Raphaely | 1700 | 2 |
| Russell | 13000 | 6 |
| Weiss | 13500 | 5 |
+-----------+--------+----------+
8 rows in set (0.003 sec)