Details
-
Type:
Bug
-
Status: Open
-
Priority:
Major
-
Resolution: Unresolved
-
Affects Version/s: CDH 5.12.0
-
Fix Version/s: None
-
Component/s: Kudu
-
Labels:None
-
Environment:kudu 1.6.0-cdh5.14.2, impalad version 2.11.0-cdh5.14.2 RELEASE (build ed85dce709da9557aeb28be89e8044947708876c)
Description
Note: CDH is 5.14.2, which was unavailable in "Component/s" dropdown when issue created.
"upsert select * from impala_table" is not working as expected and can result in duplicate primary key values in the Kudu table. Please review sequence below to reproduce.
CREATE TABLE test.clicks_impala (
click_key string,
some_non_pk_value string
)
partitioned BY (click_date_key string);
INSERT overwrite test.clicks_impala partition (click_date_key)
SELECT '1' AS click_key,
'test_a' AS some_non_pk_value,
'2018-11-25' AS click_date_key;
create table test.clicks_kudu
(
click_key string,
click_date_key string,
some_non_pk_value string,
primary key (click_key,click_date_key)
)
PARTITION BY hash (click_date_key) PARTITIONS 100
stored as kudu;
upsert INTO test.clicks_kudu
SELECT *
FROM test.clicks_impala;
INSERT overwrite test.clicks_impala partition (click_date_key)
SELECT '1' AS click_key,
'test_b' AS some_non_pk_value,
'2018-11-25' AS click_date_key;
upsert INTO test.clicks_kudu
SELECT *
FROM test.clicks_impala;
SELECT *
FROM test.clicks_kudu;
--This results in two records for click_key "1"
'1','test_a','2018-11-25'
'1','test_b','2018-11-25'
--Would expect click_key '1' some_non_pk_value to be updated from 'test_a' to 'test_b'