Test database tables:
create table item ( item_id serial not null, item text not null, constraint pk_item primary key(item_id), constraint uk_item unique (item) ); create table purchased ( purchased_id serial not null, item_id int not null, qty int not null, constraint pk_purchased primary key(purchased_id), constraint fk_purchased__item foreign key(item_id) references item(item_id) );
Test data:
insert into item (item) values
('cpu'),
('keyboard'),
('keyboard '),
('mouse');
insert into purchased(item_id,qty) values
(1,2),
(2,26),
(3,19),
(4,51),
(3,5);
test=# select * from item;
 item_id |   item    
---------+-----------
       1 | cpu
       2 | keyboard
       3 | keyboard 
       4 | mouse
(4 rows)
test=# select * from purchased;
 purchased_id | item_id | qty 
--------------+---------+-----
            1 |       1 |   2
            2 |       2 |  26
            3 |       3 |  19
            4 |       4 |  51
            5 |       3 |   5
(5 rows)
Reporting summary on items:
select i.item_id, i.item, sum(p.qty) 
from purchased p 
join item i using(item_id) 
group by i.item_id
    , i.item
order by i.item
Output:
item_id |   item    | sum 
---------+-----------+-----
       1 | cpu       |   2
       2 | keyboard  |  26
       3 | keyboard  |  24
       4 | mouse     |  51
(4 rows)
Something is wrong, not with your query, but with the dataCLEAN AND DELETE DUPLICATE DATA. 8 STEPS
Step 1 of 8 - Identify the duplicates:
select trim(lower(i.item)) as unified_name, min(item_id) as unified_id from item i group by unified_name having count(*) > 1 order by unified_name
unified_name | unified_id --------------+------------ keyboard | 2 (1 row)
Step 2 of 8 - Identify purchases that belongs to duplicated item:
select p.purchased_id, unified_item.unified_id from purchased p join item i using(item_id) join ( select trim(lower(i.item)) as unified_name, min(item_id) as unified_id from item i group by unified_name having count(*) > 1 order by unified_name ) as unified_item on unified_item.unified_name = trim(lower(i.item))
purchased_id | unified_id 
--------------+------------
            2 |          2
            3 |          2
            5 |          2
(3 rows)
Step 3 of 8 - Update those purchases that belongs to duplicated item_id with the unified id:
update purchased set item_id = unified_item.unified_id -- move the sql from step 2 here, then remove the select clause -- select p.purchased_id, unified_item.unified_id from purchased p join item i on i.item_id = p.item_id join ( select trim(lower(i.item)) as unified_name, min(item_id) as unified_id from item i group by unified_name having count(*) > 1 order by unified_name ) as unified_item on unified_item.unified_name = trim(lower(i.item)) where p.purchased_id = purchased.purchased_id
UPDATE 3
Step 4 of 8 - Verify Correctness:
select i.item_id, i.item, sum(p.qty) 
from purchased p 
join item i using(item_id) 
group by i.item_id
    , i.item
order by i.item
 item_id |   item   | sum 
---------+----------+-----
       1 | cpu      |   2
       2 | keyboard |  50
       4 | mouse    |  51
(3 rows)
Step 5 of 8 - Delete duplicates
select * from item;
 item_id |   item    
---------+-----------
       1 | cpu
       2 | keyboard
       3 | keyboard 
       4 | mouse
(4 rows)
delete from item where item_id not in (select min(item_id) from item group by trim(lower(item)))
DELETE 1
Step 6 of 8 - Verify correctness:
select * from item;
 item_id |   item   
---------+----------
       1 | cpu
       2 | keyboard
       4 | mouse
(3 rows)
Step 7 of 8 - Future-proof uniqueness:
create unique index ux_item on item using btree(trim(lower(item)))
Step 8 of 8 - Verify future-proofing (note that extra spaces there):
insert into item(item) values('keyboard  ');
ERROR: duplicate key value violates unique constraint "ux_item"
select * from item;
 item_id |   item   
---------+----------
       1 | cpu
       2 | keyboard
       4 | mouse
(3 rows)

 
impressive!
ReplyDelete