Partitions
First set these properties
SET hive.exec.dynamic.partition = true;
SET hive.exec.dynamic.partition.mode = nonstrict;
How to create partitioned table
create table c1_part (id int, name string, email string) partitioned by (countrycode string);
How to insert data into partitioned table
insert into table c1_part partition(countrycode) select c1.id, c1.name, c1.email, c1.countrycode from c1;
How to analyze paritioned tables
analyze table tableName partition(partitionName) compute statistics noscan;
Bucketing
set hive.mapred.mode=nonstrict;
set hive.enforce.bucketing=true;
create table c1_buck (id int, name string, email string, countrycode string) clustered by (id) into 10 buckets;
How to insert data into bucketed table
insert overwrite into table c1_buck select c1.id, c1.name, c1.email, c1.countrycode from c1;
Both Partitioning and Bucketing
SET hive.exec.dynamic.partition = true;
SET hive.exec.dynamic.partition.mode = nonstrict;
set hive.mapred.mode=nonstrict;
set hive.enforce.bucketing=true;
create table c1_part_buck (id int, name string, email string) partitioned by (countrycode string) clustered by (id) into 10 buckets;
insert into table c1_part_buck partition(countrycode) select c1.id, c1.name, c1.email, c1.countrycode from c1;
For transactional tables
set hive.compactor.initiator.on = true;
set hive.support.concurrency = true;
set hive.txn.manager = org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
Hive tpcds query
select dt.d_year, item.i_brand_id brand_id, item.i_brand brand, sum(ss_ext_sales_price) sum_agg from date_dim dt, store_sales, item where dt.d_date_sk = store_sales.ss_sold_date_sk and store_sales.ss_item_sk = item.i_item_sk and item.i_manufact_id = 436 and dt.d_moy=12 group by dt.d_year, item.i_brand, item.i_brand_id order by dt.d_year, sum_agg desc, brand_id limit 100;