Hive DDL操作示例

打卤 2013-04-11

记录Hive CLI中常见DDL操作示例, 备用!

1. Databases in Hive
   SHOW DATABASES;
   SHOW DATABASES LIKE 'h.*';
   CREATE DATABASE financials;
   CREATE DATABASE IF NOT EXISTS financials;
   CREATE DATABASE financials LOCATION '/my/preferred/directory';
   CREATE DATABASE financials COMMENT 'Holds all financial tables';
   CREATE DATABASE financials WITH DBPROPERTIES ('creator' = 'Mark Moneybags', 'date' = '2012-01-02');
   DESCRIBE DATABASE financials;
   DESCRIBE DATABASE EXTENDED financials;
   切换Database:
   use financials;
   DROP DATABASE IF EXISTS financials;
   DROP DATABASE IF EXISTS financials CASCADE;
2. Alter Databases
   ALTER DATABASE financials SET DBPROPERTIES ('edited-by' = 'Joe Dba');  
3. Creating Tables
   CREATE TABLE IF NOT EXISTS mydb.employees (
   name STRING COMMENT 'Employee name',
   salary FLOAT COMMENT 'Employee salary',
   subordinates ARRAY<STRING> COMMENT 'Names of subordinates',
   deductions MAP<STRING, FLOAT>
   COMMENT 'Keys are deductions names, values are percentages',
   address STRUCT<street:STRING, city:STRING, state:STRING, zip:INT>
   COMMENT 'Home address')
   COMMENT 'Description of the table'
   TBLPROPERTIES ('creator'='me', 'created_at'='2012-01-02 10:00:00', ...)
   LOCATION '/user/hive/warehouse/mydb.db/employees';
   
   CREATE TABLE IF NOT EXISTS mydb.employees2 LIKE mydb.employees;
   
   show tblproperties employees;
   SHOW TABLES IN mydb;
   SHOW TABLES 'empl.*';
   
   describe extended employees;
   describe formatted employees;
   
   DESCRIBE mydb.employees.salary;
4. Managed Tables
   CREATE EXTERNAL TABLE IF NOT EXISTS stocks (
   exchange STRING,
   symbol STRING,
   ymd STRING,
   price_open FLOAT,
   price_high FLOAT,
   price_low FLOAT,
   price_close FLOAT,
   volume INT,
   price_adj_close FLOAT)
   ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
   LOCATION '/data/stocks';
   
   CREATE EXTERNAL TABLE IF NOT EXISTS mydb.employees3
   LIKE mydb.employees
   LOCATION '/path/to/data';
5. Partitioned, Managed Tables
   CREATE TABLE employees (
   name STRING,
   salary FLOAT,
   subordinates ARRAY<STRING>,
   deductions MAP<STRING, FLOAT>,
   address STRUCT<street:STRING, city:STRING, state:STRING, zip:INT>
   )
   PARTITIONED BY (country STRING, state STRING);
   
   SHOW PARTITIONS employees PARTITION(country='US');
   SHOW PARTITIONS employees PARTITION(country='US', state='AK');   
6. Customizing Table Storage Formats
   CREATE TABLE kst
   PARTITIONED BY (ds string)
   ROW FORMAT SERDE 'com.linkedin.haivvreo.AvroSerDe'
   WITH SERDEPROPERTIES ('schema.url'='http://schema_provider/kst.avsc')
   STORED AS
   INPUTFORMAT 'com.linkedin.haivvreo.AvroContainerInputFormat'
   OUTPUTFORMAT 'com.linkedin.haivvreo.AvroContainerOutputFormat';
   
   CREATE EXTERNAL TABLE IF NOT EXISTS stocks (
   exchange STRING,
   symbol STRING,
   ymd STRING,
   price_open FLOAT,
   price_high FLOAT,
   price_low FLOAT,
   price_close FLOAT,
   volume INT,
   price_adj_close FLOAT)
   CLUSTERED BY (exchange, symbol)
   SORTED BY (ymd ASC)
   INTO 96 BUCKETS
   ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
   LOCATION '/data/stocks';
7. Dropping Tables
   DROP TABLE IF EXISTS employees;
8. Alter Table
   ALTER TABLE log_messages RENAME TO logmsgs;
   
   ALTER TABLE log_messages ADD IF NOT EXISTS
   PARTITION (year = 2011, month = 1, day = 1) LOCATION '/logs/2011/01/01'
   PARTITION (year = 2011, month = 1, day = 2) LOCATION '/logs/2011/01/02'
   PARTITION (year = 2011, month = 1, day = 3) LOCATION '/logs/2011/01/03'
   ...;
   
   ALTER TABLE log_messages PARTITION(year = 2011, month = 12, day = 2)
   SET LOCATION 's3n://ourbucket/logs/2011/01/02';
   
   ALTER TABLE log_messages DROP IF EXISTS PARTITION(year = 2011, month = 12, day = 2);
   
   ALTER TABLE log_messages
   CHANGE COLUMN hms hours_minutes_seconds INT
   COMMENT 'The hours, minutes, and seconds part of the timestamp'
   AFTER severity;
   
   ALTER TABLE log_messages ADD COLUMNS (
   app_name STRING COMMENT 'Application name',
   session_id LONG COMMENT 'The current session id');
   
   ALTER TABLE log_messages REPLACE COLUMNS (
   hours_mins_secs INT COMMENT 'hour, minute, seconds from timestamp',
   severity STRING COMMENT 'The message severity'
   message STRING COMMENT 'The rest of the message');
   
   ALTER TABLE log_messages SET TBLPROPERTIES ('notes' = 'The process id is 
   no longer captured; this column is always NULL');
   
   ALTER TABLE log_messages
   PARTITION(year = 2012, month = 1, day = 1)
   SET FILEFORMAT SEQUENCEFILE;
   
   ALTER TABLE table_using_JSON_storage
   SET SERDE 'com.example.JSONSerDe'
   WITH SERDEPROPERTIES (
   'prop1' = 'value1',
   'prop2' = 'value2');
   
   ALTER TABLE stocks
   CLUSTERED BY (exchange, symbol)
   SORTED BY (symbol)
   INTO 48 BUCKETS;
   
   ALTER TABLE log_messages TOUCH PARTITION(year = 2012, month = 1, day = 1);
   ALTER TABLE log_messages ARCHIVE PARTITION(year = 2012, month = 1, day = 1);
   
   ALTER TABLE log_messages PARTITION(year = 2012, month = 1, day = 1) ENABLE NO_DROP;
   ALTER TABLE log_messages PARTITION(year = 2012, month = 1, day = 1) ENABLE OFFLINE;

相关推荐