duplicate rows in postgresql

参见 Search and destroy duplicate rows in PostgreSQL

Find duplicates

using group

1
2
3
4
5
6
7
8
9
SELECT
firstname,
lastname,
count(*)
FROM people
GROUP BY
firstname,
lastname
HAVING count(*) > 1;

using partition

1
2
3
4
5
6
7
8
9
SELECT * FROM
(SELECT *, count(*)
OVER
(PARTITION BY
firstname,
lastname
) AS count
FROM people) tableWithCount
WHERE tableWithCount.count > 1;

Using not strict distinct

利用 not strict distinct DISTINCT ON 找到唯一的那些条,剩余的就是重复的,可以修改或删除

1
2
3
4
5
6
7
8
9
DELETE FROM people WHERE people.id NOT IN 
(SELECT id FROM (
SELECT DISTINCT ON (firstname, lastname) *
FROM people));

// more readable code
WITH unique AS
(SELECT DISTINCT ON (firstname, lastname) * FROM people)
DELETE FROM people WHERE people.id NOT IN (SELECT id FROM unique);