Identify and delete duplicate records in SQL DB
CREATE TABLE DuplicateRecords
(
id int IDENTITY(1,1) PRIMARY KEY,
first_name VARCHAR(30),
last_name VARCHAR(25),
email VARCHAR(210),
age VARCHAR(22)
);
INSERT INTO DuplicateRecords (first_name,last_name,email,age)
VALUES ('Kavin','Peterson','kavin.peterson@verizon.net','21'),
('Nick','Jonas','nick.jonas@me.com','18'),
('Peter','Heaven','peter.heaven@google.com','23'),
('Michal','Jackson','michal.jackson@aol.com','22'),
('Sean','Bean','sean.bean@yahoo.com','23'),
('Tom ','Baker','tom.baker@aol.com','20'),
('Ben','Barnes','ben.barnes@comcast.net','17'),
('Mischa ','Barton','mischa.barton@att.net','18'),
('Sean','Bean','sean.bean@yahoo.com','16'),
('Eliza','Bennett','eliza.bennett@yahoo.com','25'),
('Michal','Krane','michal.Krane@me.com','25'),
('Peter','Heaven','peter.heaven@google.com','23'),
('Brian','Blessed','brian.blessed@yahoo.com','20'),
('Kavin','Peterson','kavin.peterson@verizon.net','21');
select * from DuplicateRecords order by 2
/***************** Find Duplicate Records ******************/
-- 1st Method
;with cteD AS
(
select first_name,last_name,email,age
,ROW_NUMBER() over (partition by first_name,last_name,email,age
order by first_name,last_name,email,age) RNumber
from DuplicateRecords
)
select * from cteD
where RNumber>1
GO
-- 2nd Method
select A.* from DuplicateRecords A
JOIN DuplicateRecords B
ON A.first_name=B.first_name
AND A.last_name=B.last_name
AND A.email=B.email
AND A.age=B.age
AND A.id>B.id
GO
-- 3rd Method
select A.* FROM DuplicateRecords A
WHERE
id > (
SELECT min(id) FROM DuplicateRecords B
WHERE A.first_name=B.first_name
AND A.last_name=B.last_name
AND A.email=B.email
AND A.age=B.age
);
GO
-- 4th Method
SELECT first_name,last_name,email,age
FROM DuplicateRecords
GROUP BY first_name,last_name,email,age
HAVING COUNT(*) > 1
ORDER BY COUNT(*) DESC
GO
-- 5th Method
select LU.*
FROM (SELECT first_name,last_name,email,age,
Row_number()
OVER (
partition BY first_name,last_name,email,age
ORDER BY id DESC) [Row]
FROM DuplicateRecords) LU
WHERE [row] > 1
GO
-- 6th Method
select * FROM DuplicateRecords WHERE id IN
(
SELECT MAX(id) FROM DuplicateRecords
GROUP BY first_name,last_name,email,age HAVING COUNT(id)>1
)
/***************** Delete Duplicate Records ******************/
-- 1st Method
;with cteD AS
(
select first_name,last_name,email,age
,ROW_NUMBER() over (partition by first_name,last_name,email,age
order by first_name,last_name,email,age) RNumber
from DuplicateRecords
)
delete from cteD
where RNumber>1
GO
-- 2nd Method
delete A from DuplicateRecords A
JOIN DuplicateRecords B
ON A.first_name=B.first_name
AND A.last_name=B.last_name
AND A.email=B.email
AND A.age=B.age
AND A.id>B.id
GO
-- 3rd Method
delete A FROM DuplicateRecords A
WHERE
id > (
SELECT min(id) FROM DuplicateRecords B
WHERE A.first_name=B.first_name
AND A.last_name=B.last_name
AND A.email=B.email
AND A.age=B.age
);
GO
-- 4th Method
delete LU
FROM (SELECT first_name,last_name,email,age,
Row_number()
OVER (
partition BY first_name,last_name,email,age
ORDER BY id DESC) [Row]
FROM DuplicateRecords) LU
WHERE [row] > 1
GO
-- 5th Method
delete FROM DuplicateRecords WHERE id IN
(
SELECT MAX(id) FROM DuplicateRecords
GROUP BY first_name,last_name,email,age HAVING COUNT(id)>1
)
GO
-- 6th Method
DELETE FROM DuplicateRecords
WHERE ID NOT IN (SELECT MAX(ID)
FROM DuplicateRecords
GROUP BY first_name,last_name,email,age
HAVING MAX(ID) IS NOT NULL)
GO
Thanks for sharing :-)
ReplyDeleteThank you for your valuable feedback
DeleteReally good information to show through this blog. I really appreciate you for all the valuable information that you are providing us through your blog.
ReplyDeleteThanks for your feedback
DeleteThanks for your feedback
ReplyDeleteThanks that really help
ReplyDeleteThanks for your feedback
Delete