一、数据库操作
1. 创建数据库
sql
CREATE DATABASE [IF NOT EXISTS] database_name
[COMMENT database_comment]
[LOCATION hdfs_path]
[WITH DBPROPERTIES (property_name=property_value, ...)];
示例:创建名为test_db
的数据库
sql
CREATE DATABASE IF NOT EXISTS test_db
COMMENT 'This is a test database'
LOCATION '/user/hive/databases/test_db'
WITH DBPROPERTIES ('creator'='admin', 'date'='2023-01-01');
2. 查看数据库
sql
SHOW DATABASES;
SHOW DATABASES LIKE 'hive_*'; -- 模糊查询
3. 切换数据库
sql
USE database_name;
4. 查看数据库详情
sql
DESCRIBE DATABASE [EXTENDED] database_name;
5. 删除数据库
sql
DROP DATABASE [IF EXISTS] database_name [CASCADE]; -- CASCADE表示级联删除表
二、表操作
1. 创建内部表
sql
CREATE TABLE [IF NOT EXISTS] table_name (
column_name data_type [COMMENT column_comment],
...
)
[COMMENT table_comment]
[ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n']
[STORED AS TEXTFILE]
[LOCATION hdfs_path];
示例:创建员工表
sql
CREATE TABLE IF NOT EXISTS employees (
id INT COMMENT 'Employee ID',
name STRING COMMENT 'Employee Name',
salary FLOAT COMMENT 'Employee Salary'
)
COMMENT 'Employee Information Table'
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
2. 创建外部表
sql
CREATE EXTERNAL TABLE [IF NOT EXISTS] table_name (
column_name data_type [COMMENT column_comment],
...
)
[COMMENT table_comment]
[ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n']
[STORED AS TEXTFILE]
LOCATION hdfs_path;
3. 创建分区表
sql
CREATE TABLE [IF NOT EXISTS] table_name (
column_name data_type [COMMENT column_comment],
...
)
PARTITIONED BY (partition_column data_type)
[ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'];
示例:按日期分区的日志表
sql
CREATE TABLE IF NOT EXISTS logs (
id INT,
content STRING
)
PARTITIONED BY (log_date STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
4. 查看表列表
sql
SHOW TABLES;
SHOW TABLES LIKE 'emp_*'; -- 模糊查询
5. 查看表结构
sql
DESCRIBE [EXTENDED] table_name; -- EXTENDED显示详细信息
6. 修改表名
sql
ALTER TABLE old_table_name RENAME TO new_table_name;
7. 删除表
sql
DROP TABLE [IF EXISTS] table_name; -- 内部表数据会被删除,外部表仅删除元数据
三、数据加载与导出
1. 从本地文件系统加载数据
sql
LOAD DATA LOCAL INPATH 'local_file_path'
[OVERWRITE] INTO TABLE table_name
[PARTITION (partition_column=value)];
示例:加载本地数据到员工表
sql
LOAD DATA LOCAL INPATH '/home/user/employees.csv'
OVERWRITE INTO TABLE employees;
2. 从 HDFS 加载数据
sql
LOAD DATA INPATH 'hdfs_file_path'
[OVERWRITE] INTO TABLE table_name
[PARTITION (partition_column=value)];
3. 插入数据到表
sql
INSERT INTO TABLE table_name [PARTITION (partition_column=value)]
SELECT column1, column2, ... FROM source_table;
4. 导出数据到本地
sql
INSERT OVERWRITE LOCAL DIRECTORY 'local_output_path'
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
SELECT column1, column2, ... FROM table_name;
四、查询操作
1. 基本查询
sql
SELECT column1, column2, ...
FROM table_name
[WHERE condition]
[GROUP BY column]
[HAVING condition]
[ORDER BY column [ASC|DESC]]
[LIMIT number];
2. 关联查询
sql
SELECT a.column, b.column
FROM table_a a
JOIN table_b b ON a.key = b.key; -- INNER JOIN
-- LEFT JOIN, RIGHT JOIN, FULL OUTER JOIN
3. 子查询
sql
SELECT * FROM table_name
WHERE column IN (SELECT column FROM another_table WHERE condition);
五、分区与分桶
1. 添加分区
sql
ALTER TABLE table_name ADD [IF NOT EXISTS]
PARTITION (partition_column=value) [LOCATION hdfs_path];
2. 删除分区
sql
ALTER TABLE table_name DROP [IF EXISTS] PARTITION (partition_column=value);
3. 创建分桶表
sql
CREATE TABLE [IF NOT EXISTS] table_name (
column_name data_type,
...
)
CLUSTERED BY (column) INTO num_buckets BUCKETS;
六、常用函数
1. 字符串函数
sql
SELECT CONCAT('Hello', ' ', 'World'); -- 拼接字符串
SELECT SUBSTR('Hive', 1, 2); -- 截取子串,返回'HI'
SELECT LENGTH('Hive'); -- 返回长度4
2. 日期函数
sql
SELECT CURRENT_DATE(); -- 获取当前日期
SELECT DATE_FORMAT('2023-01-01', 'yyyy-MM-dd'); -- 格式化日期
3. 聚合函数
sql
SELECT COUNT(*), AVG(salary), MAX(salary) FROM employees;
七、其他常用命令
1. 查看 Hive 版本
sql
SELECT VERSION();
2. 执行 HQL 脚本
bash
hive -f /path/to/script.hql
3. 查看作业历史
sql
SHOW HISTORY;
4. 设置 Hive 参数
sql
SET hive.exec.dynamic.partition.mode=nonstrict; -- 动态分区模式