Identify and delete duplicate records in SQL DB

CREATE TABLE DuplicateRecords

(

id int IDENTITY(1,1) PRIMARY KEY,

first_name VARCHAR(30),

last_name VARCHAR(25),

email VARCHAR(210),

age VARCHAR(22)

);

INSERT INTO DuplicateRecords (first_name,last_name,email,age)

VALUES ('Kavin','Peterson','kavin.peterson@verizon.net','21'),

('Nick','Jonas','nick.jonas@me.com','18'),

('Peter','Heaven','peter.heaven@google.com','23'),

('Michal','Jackson','michal.jackson@aol.com','22'),

('Sean','Bean','sean.bean@yahoo.com','23'),

('Tom ','Baker','tom.baker@aol.com','20'),

('Ben','Barnes','ben.barnes@comcast.net','17'),

('Mischa ','Barton','mischa.barton@att.net','18'),

('Sean','Bean','sean.bean@yahoo.com','16'),

('Eliza','Bennett','eliza.bennett@yahoo.com','25'),

('Michal','Krane','michal.Krane@me.com','25'),

('Peter','Heaven','peter.heaven@google.com','23'),

('Brian','Blessed','brian.blessed@yahoo.com','20'),

('Kavin','Peterson','kavin.peterson@verizon.net','21');

select * from DuplicateRecords order by 2

/***************** Find Duplicate Records ******************/

-- 1st Method

;with cteD AS

(

select first_name,last_name,email,age

,ROW_NUMBER() over (partition by first_name,last_name,email,age

order by first_name,last_name,email,age) RNumber

from DuplicateRecords

)

select * from cteD

where RNumber>1

-- 2nd Method

select A.* from DuplicateRecords A

JOIN DuplicateRecords B

ON A.first_name=B.first_name

AND A.last_name=B.last_name

AND A.email=B.email

AND A.age=B.age

AND A.id>B.id

-- 3rd Method

select A.* FROM DuplicateRecords A

WHERE

id > (

SELECT min(id) FROM DuplicateRecords B

WHERE A.first_name=B.first_name

AND A.last_name=B.last_name

AND A.email=B.email

AND A.age=B.age

);

-- 4th Method

SELECT first_name,last_name,email,age

FROM DuplicateRecords

GROUP BY first_name,last_name,email,age

HAVING COUNT(*) > 1

ORDER BY COUNT(*) DESC

-- 5th Method

select LU.*

FROM (SELECT first_name,last_name,email,age,

Row_number()

OVER (

partition BY first_name,last_name,email,age

ORDER BY id DESC) [Row]

FROM DuplicateRecords) LU

WHERE [row] > 1

-- 6th Method

select * FROM DuplicateRecords WHERE id IN

(

SELECT MAX(id) FROM DuplicateRecords

GROUP BY first_name,last_name,email,age HAVING COUNT(id)>1

)

/***************** Delete Duplicate Records ******************/

-- 1st Method

;with cteD AS

(

select first_name,last_name,email,age

,ROW_NUMBER() over (partition by first_name,last_name,email,age

order by first_name,last_name,email,age) RNumber

from DuplicateRecords

)

delete from cteD

where RNumber>1

-- 2nd Method

delete A from DuplicateRecords A

JOIN DuplicateRecords B

ON A.first_name=B.first_name

AND A.last_name=B.last_name

AND A.email=B.email

AND A.age=B.age

AND A.id>B.id

-- 3rd Method

delete A FROM DuplicateRecords A

WHERE

id > (

SELECT min(id) FROM DuplicateRecords B

WHERE A.first_name=B.first_name

AND A.last_name=B.last_name

AND A.email=B.email

AND A.age=B.age

);

-- 4th Method

delete LU

FROM (SELECT first_name,last_name,email,age,

Row_number()

OVER (

partition BY first_name,last_name,email,age

ORDER BY id DESC) [Row]

FROM DuplicateRecords) LU

WHERE [row] > 1

-- 5th Method

delete FROM DuplicateRecords WHERE id IN

(

SELECT MAX(id) FROM DuplicateRecords

GROUP BY first_name,last_name,email,age HAVING COUNT(id)>1

)

-- 6th Method

DELETE FROM DuplicateRecords

WHERE ID NOT IN (SELECT MAX(ID)

FROM DuplicateRecords

GROUP BY first_name,last_name,email,age

HAVING MAX(ID) IS NOT NULL)

Comments

ram kSat Sept 04, 05:40:00 pm
Thanks for sharing :-)
ReplyDelete
Replies
NarasimhaWed Sept 22, 04:24:00 pm
Really good information to show through this blog. I really appreciate you for all the valuable information that you are providing us through your blog.
ReplyDelete
Replies
Balu ChallaThu Sept 30, 09:28:00 pm
Thanks for your feedback
ReplyDelete
Replies
Self LearnerSat Oct 02, 12:42:00 pm
Thanks that really help
ReplyDelete
Replies

Add comment

MSBI Tutor

Search This Blog

Identify and delete duplicate records in SQL DB

Labels

Comments

Post a Comment

Popular posts from this blog

SSRS INTERVIEW QUESTIONS

Exception deserializing the package "The process cannot access the file because it is being used by another process."

Failed to execute the package or element. Build errors were encountered