Test database tables:
create table item ( item_id serial not null, item text not null, constraint pk_item primary key(item_id), constraint uk_item unique (item) ); create table purchased ( purchased_id serial not null, item_id int not null, qty int not null, constraint pk_purchased primary key(purchased_id), constraint fk_purchased__item foreign key(item_id) references item(item_id) );
Test data:
insert into item (item) values ('cpu'), ('keyboard'), ('keyboard '), ('mouse'); insert into purchased(item_id,qty) values (1,2), (2,26), (3,19), (4,51), (3,5);
test=# select * from item; item_id | item ---------+----------- 1 | cpu 2 | keyboard 3 | keyboard 4 | mouse (4 rows) test=# select * from purchased; purchased_id | item_id | qty --------------+---------+----- 1 | 1 | 2 2 | 2 | 26 3 | 3 | 19 4 | 4 | 51 5 | 3 | 5 (5 rows)
Reporting summary on items:
select i.item_id, i.item, sum(p.qty) from purchased p join item i using(item_id) group by i.item_id , i.item order by i.item
Output:
item_id | item | sum ---------+-----------+----- 1 | cpu | 2 2 | keyboard | 26 3 | keyboard | 24 4 | mouse | 51 (4 rows)Something is wrong, not with your query, but with the data
CLEAN AND DELETE DUPLICATE DATA. 8 STEPS
Step 1 of 8 - Identify the duplicates:
select trim(lower(i.item)) as unified_name, min(item_id) as unified_id from item i group by unified_name having count(*) > 1 order by unified_name
unified_name | unified_id --------------+------------ keyboard | 2 (1 row)
Step 2 of 8 - Identify purchases that belongs to duplicated item:
select p.purchased_id, unified_item.unified_id from purchased p join item i using(item_id) join ( select trim(lower(i.item)) as unified_name, min(item_id) as unified_id from item i group by unified_name having count(*) > 1 order by unified_name ) as unified_item on unified_item.unified_name = trim(lower(i.item))
purchased_id | unified_id --------------+------------ 2 | 2 3 | 2 5 | 2 (3 rows)
Step 3 of 8 - Update those purchases that belongs to duplicated item_id with the unified id:
update purchased set item_id = unified_item.unified_id -- move the sql from step 2 here, then remove the select clause -- select p.purchased_id, unified_item.unified_id from purchased p join item i on i.item_id = p.item_id join ( select trim(lower(i.item)) as unified_name, min(item_id) as unified_id from item i group by unified_name having count(*) > 1 order by unified_name ) as unified_item on unified_item.unified_name = trim(lower(i.item)) where p.purchased_id = purchased.purchased_id
UPDATE 3
Step 4 of 8 - Verify Correctness:
select i.item_id, i.item, sum(p.qty) from purchased p join item i using(item_id) group by i.item_id , i.item order by i.item
item_id | item | sum ---------+----------+----- 1 | cpu | 2 2 | keyboard | 50 4 | mouse | 51 (3 rows)
Step 5 of 8 - Delete duplicates
select * from item;
item_id | item ---------+----------- 1 | cpu 2 | keyboard 3 | keyboard 4 | mouse (4 rows)
delete from item where item_id not in (select min(item_id) from item group by trim(lower(item)))
DELETE 1
Step 6 of 8 - Verify correctness:
select * from item;
item_id | item ---------+---------- 1 | cpu 2 | keyboard 4 | mouse (3 rows)
Step 7 of 8 - Future-proof uniqueness:
create unique index ux_item on item using btree(trim(lower(item)))
Step 8 of 8 - Verify future-proofing (note that extra spaces there):
insert into item(item) values('keyboard ');
ERROR: duplicate key value violates unique constraint "ux_item"
select * from item;
item_id | item ---------+---------- 1 | cpu 2 | keyboard 4 | mouse (3 rows)
impressive!
ReplyDelete