Identify and delete duplicate records in SQL DB

CREATE TABLE DuplicateRecords

( 

             id int IDENTITY(1,1) PRIMARY KEY, 

             first_name VARCHAR(30), 

             last_name VARCHAR(25),  

             email VARCHAR(210), 

             age VARCHAR(22)

);

 

    INSERT INTO DuplicateRecords (first_name,last_name,email,age)  

    VALUES ('Kavin','Peterson','kavin.peterson@verizon.net','21'), 

           ('Nick','Jonas','nick.jonas@me.com','18'), 

           ('Peter','Heaven','peter.heaven@google.com','23'), 

           ('Michal','Jackson','michal.jackson@aol.com','22'), 

           ('Sean','Bean','sean.bean@yahoo.com','23'), 

           ('Tom ','Baker','tom.baker@aol.com','20'), 

           ('Ben','Barnes','ben.barnes@comcast.net','17'), 

           ('Mischa ','Barton','mischa.barton@att.net','18'), 

           ('Sean','Bean','sean.bean@yahoo.com','16'), 

           ('Eliza','Bennett','eliza.bennett@yahoo.com','25'), 

           ('Michal','Krane','michal.Krane@me.com','25'), 

           ('Peter','Heaven','peter.heaven@google.com','23'), 

           ('Brian','Blessed','brian.blessed@yahoo.com','20'), 

           ('Kavin','Peterson','kavin.peterson@verizon.net','21');

 

select * from DuplicateRecords order by 2

 

/***************** Find Duplicate Records ******************/

-- 1st Method

;with cteD AS

       (

             select first_name,last_name,email,age

                           ,ROW_NUMBER() over (partition by first_name,last_name,email,age

                                        order by first_name,last_name,email,age) RNumber

             from DuplicateRecords

       )

select * from cteD

where RNumber>1

GO

-- 2nd Method

select A.* from DuplicateRecords A

JOIN DuplicateRecords B

       ON A.first_name=B.first_name

       AND A.last_name=B.last_name

       AND A.email=B.email

       AND A.age=B.age

       AND A.id>B.id

GO

-- 3rd Method

select A.* FROM DuplicateRecords A

WHERE

    id > (

        SELECT  min(id) FROM DuplicateRecords B

        WHERE A.first_name=B.first_name

                    AND A.last_name=B.last_name

                    AND A.email=B.email

                    AND A.age=B.age

    );

GO

-- 4th Method

SELECT first_name,last_name,email,age

FROM   DuplicateRecords

GROUP  BY first_name,last_name,email,age

HAVING COUNT(*) > 1

ORDER  BY COUNT(*) DESC

GO

-- 5th Method

select LU.*

FROM   (SELECT first_name,last_name,email,age,

               Row_number()

                 OVER (

                   partition BY first_name,last_name,email,age

                   ORDER BY id DESC) [Row]

        FROM   DuplicateRecords) LU

WHERE  [row] > 1

GO

-- 6th Method

select * FROM DuplicateRecords WHERE id IN

 (

  SELECT MAX(id) FROM DuplicateRecords

   GROUP BY  first_name,last_name,email,age HAVING COUNT(id)>1

 )

 

/***************** Delete Duplicate Records ******************/

-- 1st Method

;with cteD AS

       (

             select first_name,last_name,email,age

                           ,ROW_NUMBER() over (partition by first_name,last_name,email,age

                                        order by first_name,last_name,email,age) RNumber

             from DuplicateRecords

       )

delete from cteD

where RNumber>1

GO

-- 2nd Method

delete A from DuplicateRecords A

JOIN DuplicateRecords B

       ON A.first_name=B.first_name

       AND A.last_name=B.last_name

       AND A.email=B.email

       AND A.age=B.age

       AND A.id>B.id

GO

-- 3rd Method

delete A FROM DuplicateRecords A

WHERE

    id > (

        SELECT  min(id) FROM DuplicateRecords B

        WHERE A.first_name=B.first_name

                    AND A.last_name=B.last_name

                    AND A.email=B.email

                    AND A.age=B.age

    );

GO

-- 4th Method

delete LU

FROM   (SELECT first_name,last_name,email,age,

               Row_number()

                 OVER (

                   partition BY first_name,last_name,email,age

                   ORDER BY id DESC) [Row]

        FROM   DuplicateRecords) LU

WHERE  [row] > 1

GO

-- 5th Method

delete FROM DuplicateRecords WHERE id IN

 (

  SELECT MAX(id) FROM DuplicateRecords

   GROUP BY  first_name,last_name,email,age HAVING COUNT(id)>1

 )

GO

-- 6th Method

DELETE FROM DuplicateRecords

WHERE  ID NOT IN (SELECT MAX(ID)

                  FROM   DuplicateRecords

                  GROUP  BY first_name,last_name,email,age

                  HAVING MAX(ID) IS NOT NULL)

GO

Comments

  1. Really good information to show through this blog. I really appreciate you for all the valuable information that you are providing us through your blog.

    ReplyDelete

Post a Comment

Hi User,
Thanks for visiting My Blog and please provide your valuable feedback and subscribe for more updates. Please don't post any spam content or comments.
Thank You

Popular Posts

Failed to execute the package or element. Build errors were encountered

Restore of database 'DataBase_Name' failed. (Microsoft.SqlServer.Management.RelationalEngineTasks)

Cannot convert "Column" between a unicode and a non-unicode string data types in SSIS