APPROX_COUNT_DISTINCT
Syntax
APPROX_COUNT_DISTINCT(expr) OVER over_clause
Description
This function calculates the number of distinct rows in the expression expr and returns an approximate value. It is significantly faster than COUNT() when handling large amounts of data, and the deviation from the exact result is negligible.
Examples
CREATE TABLE employees (manager_id INT,last_name varchar(50),hiredate varchar(50),SALARY INT);
INSERT INTO employees VALUES(100, 'Raphaely', '2017-07-01', 1700);
INSERT INTO employees VALUES(100, 'De Haan', '2018-05-01',11000);
INSERT INTO employees VALUES(100, 'Errazuriz', '2017-07-21', 1400);
INSERT INTO employees VALUES(100, 'Hartstein', '2019-05-01',14000);
INSERT INTO employees VALUES(100, 'Raphaely', '2017-07-22', 1700);
INSERT INTO employees VALUES(100, 'Weiss', '2019-07-11',13500);
INSERT INTO employees VALUES(100, 'Russell', '2019-10-05', 13000);
INSERT INTO employees VALUES(100, 'Partners', '2018-12-01',14000);
INSERT INTO employees VALUES(200, 'Ross', '2019-06-11',13500);
INSERT INTO employees VALUES(200, 'Bell', '2019-05-25', 13000);
INSERT INTO employees VALUES(200, 'Part', '2018-08-11',14000);
COMMIT;
SELECT last_name, salary, APPROX_COUNT_DISTINCT(salary) OVER (ORDER BY hiredate) "Variance"
FROM employees WHERE manager_id = 100 ORDER BY last_name, salary, "Variance";
+-----------+--------+----------+
| LAST_NAME | SALARY | Variance |
+-----------+--------+----------+
| De Haan | 11000 | 3 |
| Errazuriz | 1400 | 2 |
| Hartstein | 14000 | 4 |
| Partners | 14000 | 4 |
| Raphaely | 1700 | 1 |
| Raphaely | 1700 | 2 |
| Russell | 13000 | 6 |
| Weiss | 13500 | 5 |
+-----------+--------+----------+
8 rows in set (0.003 sec)