Skip to main content

important

create database veer;
use veer;
create table item_details(item_id int,item_name varchar(30),item_price int,item_color varchar(10)) row format delimited fields terminated by',' lines terminated by '\n' stored as textfile;
hadoop fs -put /home/cloudera/sk/data/item.txt /user/hive/warehouse/veer.db/item
select count(*) from item_details;
select * from item_details order by item_id desc;
select item_color,count(*) from item_details group by item_color;




---------------------------31-01-2018----------------------------------
create table flight_nt(s_id int,s_name varchar(30),code varchar(20),state varchar(40))
row format delimited fields terminated by',' lines terminated by '\n' stored as textfile;

create table flightdata_part
(s_id int,
s_name varchar(30),
code varchar(20)
)
PARTITIONED BY (state VARCHAR(64))
row format delimited
fields terminated by ','
lines terminated by '\n' 
stored as textfile;

LOAD DATA LOCAL INPATH '/local/home/srv-ae-si01/RTCTM/arun_dev/interns_training/data/FlightBook.txt' INTO TABLE interns_training.flight_nt;


--- PARTITIONED--

CREATE TABLE IF NOT EXISTS flightdata_partition(
id int,
name varchar(20),
code varchar(10)
)
PARTITIONED BY (
year int,
month int,
day int)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
Lines terminated by '\n'
STORED AS TEXTFILE;

LOCATION '/home/cloudera/sk/data/FlightBook.txt';

SET hive.exec.dynamic.partition = true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions=1000;
set hive.exec.max.dynamic.partitions.pernode=1000;
SET hive.exec.compress.output=true;
set.hive.enforce.bucketing=true;
SET mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;
SET mapred.output.compression.type=BLOCK;
--Dynamic PARTITIONED

insert into flight_partition partition(state) select * from interns_training.flight_nt;

--static partition.---
insert into flight_partition partition(state='Australia') select s_id,s_name,code from interns_training.flight_nt where state='Australia';

alter table flight_partition add partition(state='United_States')

---- Bucketing

create table flight_bucket
(
s_id int,
s_name varchar(30),
code varchar(20),
state varchar(30)

CLUSTERED BY (s_id) INTO 10 BUCKETS
row format delimited
fields terminated by ','
lines terminated by '\n' 
stored as textfile;

set hive.enforce.bucketing = true;

insert into flight_bucket select * from interns_training.flight_nt;

---- ORC -------------------------

CREATE TABLE flight_orc
(s_id int,
s_name varchar(30),
code varchar(20),
state string
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
STORED AS ORC;

insert into flight_orc select * from interns_training.flight_nt;

------------------------------PARTITIONED_BUCKET_ORC---------------------------
create table flight_parti_bucket_orc
(
s_id int,
s_name varchar(30),
code varchar(20)
)
partitioned by (state varchar(30)) 
CLUSTERED BY (s_id) INTO 10 BUCKETS
row format delimited
fields terminated by ','
lines terminated by '\n' 
stored as orc;
insert into flight_parti_bucket_orc select * from flightdata;
-----------------------------------snappy--------------------
create table flight_parti_bucket_orc
(
s_id int,
s_name varchar(30),
code varchar(20)
)
partitioned by (state varchar(30)) 
CLUSTERED BY (s_id) INTO 10 BUCKETS
row format delimited
fields terminated by ','
lines terminated by '\n' 
stored as orc
tblproperties (“orc.compress" = “SNAPPY”);
------------------------------------------------------------------------------



create table flight_partition_bucket(s_id int,s_name varchar(30),code varchar(20))
PARTITIONED BY (state VARCHAR(64)) CLUSTERED BY (s_id) INTO 10 BUCKETS row format delimited fields terminated by ',' lines terminated by
'\n'  stored as textfile;




insert into flight_partition partition(state) select * from veer.flight_nt limit 1000;


select A.state,B.state from flight_nt A join flight_orc B on A.state=B.state limit 100;




-----------------------------------------------------------------------------------------
set hive.enforce.sortmergebucketmapjoin=false;

set hive.auto.convert.sortmerge.join=true;

set hive.optimize.bucketmapjoin = true;

set hive.optimize.bucketmapjoin.sortedmerge = true;

set hive.auto.convert.join=false;  // if we do not do this, automatically Map-Side Join will happen
-------------------------------------------------1/2/2018--------------------------------------
create table flight_parti_bucket_orc
     (
      s_id int,
     s_name varchar(30),
     code varchar(20)
     )
     partitioned by (state varchar(30))
     CLUSTERED BY (s_id) INTO 10 BUCKETS
     row format delimited
     fields terminated by ','
     lines terminated by '\n'
     stored as orc;
-------------------------------left semi join--------------
select f.s_id from flight_nt f left semi  join flight_join j on f.s_id=j.id;


--------------------------map side join------------------


select f.s_id,j.id from flight_nt f join flight_join j on f.s_id=j.id;
Time taken: 74.647 seconds,

set hive.auto.convert.join=true;
set hive.auto.convert.join.noconditionaltask=true;
------errror-------------
select f.s_id,j.id from flight_nt f join flight_join j on (f.s_id=j.id);
Query ID = cloudera_20180201031919_307c2d2e-eb9f-4789-b777-91f1506b11d5
Total jobs = 1
FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask

-----------Sort-Merge-Bucket (SMB) Map Join------------

set hive.enforce.bucketing=true;
set hive.enforce.sorting=true;
---------------skewed join------------

set hive.optimize.skewjoin = true;
set hive.skewjoin.key=500000;
set hive.skewjoin.mapjoin.map.tasks=10000;
set hive.skewjoin.mapjoin.min.split=33554432;

Comments

Popular posts from this blog

How to learn Hadoop

First of all, I want to tell you. here all content you will get  practical only. for the theoretical part, you can follow edureka videos and Durgasoft videos. once you complete some videos related to map-reduce and Hadoop ecosystem..after that you can follow me... you need to go step by step:- 1. What is big data 2. What is the need of Hadoop 3. How Hadoop works. 4. Tools in Hadoop ecosystem. If you are a beginner then first you need some configuration in your system which is: 1: RAM:-             Minimum RAM size should be 8gb otherwise you will get frustrated because of a slow system.             if you go with 16gb RAM then it will be great. My opinion is to go with 16gb RAM. 2: Download VMWARE. https://my.vmware.com/en/web/vmware/info/slug/desktop_end_user_computing/vmware_workstation_pro/14_0 3. Download Cloudera -> https://www.cloudera.com/downloads/cdh/5-14-0.html . inst...

Big Data in Business

How is big data impacting business and people? Have you ever searched for or bought a product on Amazon? Did you notice that Amazon started making recommendations related to the product you searched for Recommendation engines are a common application of big data. Companies like Amazon, Netflix and Spotify use algorithms based on big data to make specific recommendations based on customer preferences and historical behavior. Personal assistants like Siri on Apple devices use big data to devise answers to the infinite number of questions end users may ask. Google now makes recommendations based on the big data on a user's device. Now that we have an idea of how consumers are using big data, let's take a look at how big data is impacting business. In 2011, McKinsey & Company said that big data was going to become the key basis of competition supporting new waves of productivity growth and innovation. In 2013, UPS announced that it was ...

sqoop

----------for database connect-------- mysql -u root -pcloudera ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- >>sqoop import --connect jdbc://localhost/retail_db --username root --password cloudera --table customers --target-dir /data/sqoop/test01 >>sqoop import --connect jdbc:mysql://localhost/retail_db --username root --password cloudera --table customers --target-dir /data/sqoop/test02 -m 10 >>sqoop import --connect jdbc:mysql://localhost/retail_db --username root --password cloudera --table customers --target-dir /data/sqoop/test03 --split-by customer_id --fields-terminated-by '|' -m 10 >>sqoop list-databases --connect jdbc:mysql://localhost/retail_db --username root --password cloudera >>sqoop list-tables --connect jdbc:mysql://localhost/retail_db --username root --password cloudera -----------...