hive使用2
前面介绍了hive安装http://www.highersoft.net/html/notice/notice_703.html
使用的是3.1.2版本。
今天主要是创建表,导入数据。
之前已经配置了环境变量
export HIVE_CONF_DIR=/home/chengzhong/hadoop/apache-hive-3.1.2-bin/conf
使用hive命令进入hive环境。
create database gd_monitor_platform_dev; use gd_monitor_platform_dev; show tables;
用sql工具生成create语句,执行
在hadoop创建,/home/chengzhong/hadoop/csv
注意这里的路径是hadoop路径,如果是
SQL工具:
在mysql的test库里:
use test; CREATE TABLE dim_ddl_convert ( source VARCHAR(100) NOT NULL, data_type1 VARCHAR(100) NOT NULL, target VARCHAR(100) NOT NULL, data_type2 VARCHAR(100), update_time datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', PRIMARY KEY (source, data_type1, target) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='数据库表结构转换'; INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'bigint', 'hive', 'BIGINT'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'bigint', 'odps', 'BIGINT'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'binary', 'hive', 'BINARY'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'binary', 'odps', 'BINARY'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'char', 'hive', 'STRING'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'char', 'odps', 'STRING'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'date', 'hive', 'DATE'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'datetime', 'hive', 'TIMESTAMP'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'datetime', 'odps', 'DATETIME'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'decimal', 'hive', 'DECIMAL'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'decimal', 'odps', 'DOUBLE'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'double', 'hive', 'DOUBLE'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'double', 'odps', 'DOUBLE'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'float', 'hive', 'DOUBLE'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'float', 'odps', 'DOUBLE'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'int', 'hive', 'BIGINT'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'int', 'odps', 'BIGINT'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'json', 'hive', 'MAP<STRING,STRING>'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'json', 'odps', 'MAP<STRING,STRING>'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'mediumtext', 'hive', 'STRING'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'mediumtext', 'odps', 'STRING'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'smallint', 'hive', 'BIGINT'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'smallint', 'odps', 'BIGINT'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'text', 'hive', 'STRING'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'text', 'odps', 'STRING'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'time', 'hive', 'TIMESTAMP'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'time', 'odps', 'STRING'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'timestamp', 'hive', 'TIMESTAMP'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'timestamp', 'odps', 'DATETIME'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'tinyint', 'hive', 'BIGINT'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'tinyint', 'odps', 'BIGINT'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'varbinary', 'hive', 'BINARY'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'varbinary', 'odps', 'BINARY'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'varchar', 'db2', 'varchar'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'varchar', 'hive', 'STRING'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'varchar', 'odps', 'STRING'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'varchar', 'oracle', 'varchar'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'varchar', 'sqlserver', 'varchar'); INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'varchar', 'sybase', 'varchar'); # 修改GROUP_CONCAT函数返回的结果大小被MySQL为1024(字节)的默认长度限制, 比如扩大一百倍 SET SESSION group_concat_max_len = 102400; SELECT a.table_name, b.table_comment, concat( 'CREATE EXTERNAL TABLE IF NOT EXISTS ', a.table_name, ' (', group_concat( concat( '`', a.column_name, '` ', CASE WHEN a.column_type LIKE 'decimal%' THEN a.column_type ELSE c.data_type2 END, " COMMENT '", column_comment, "'" ) ORDER BY a.table_name, a.ordinal_position ), ") COMMENT '", b.table_comment, "' ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE;" ) AS col_name FROM ( SELECT table_schema, table_name, column_name, ordinal_position, CASE WHEN column_type LIKE '%decimal%' THEN column_type ELSE data_type END AS data_type, column_type, column_comment FROM information_schema.COLUMNS WHERE table_schema = 'gd_monitor_platform_dev' #库名 and table_name='t_monitor_info' #表名 ) AS a LEFT JOIN information_schema.TABLES AS b ON a.table_name = b.table_name AND a.table_schema = b.table_schema LEFT JOIN ( SELECT * FROM dim_ddl_convert WHERE source = 'mysql' AND target = 'hive' ) AS c ON ( a.data_type = c.data_type1 OR a.data_type LIKE '%decimal%' ) WHERE b.table_type = 'BASE TABLE' and a.TABLE_NAME not like 'dim_ddl_convert%' GROUP BY a.TABLE_NAME, b.TABLE_COMMENT ;用上面最后的select ,更换库名、表名,执行可生成hive的create语句。
建表的5种存储方式https://blog.csdn.net/ZZQHELLO2018/article/details/106175887
导出csv文件,注意以逗号分隔开字段。执行命令
LOAD DATA INPATH '/home/chengzhong/hadoop/csv/t_monitor_info_202308141504.csv' overwrite INTO TABLE t_monitor_info; LOAD DATA INPATH '/Users/chengzhong/t_traffic_reported_data_20230721_202308141755.csv' overwrite INTO TABLE t_traffic_reported_data_20230721;
这时用select * from t_monitor_info无法查出,可用以下全集指定本地运行:
set hive.exec.mode.local.auto=true;项目中要查询hive,引入了hive-jdbc 后,spring-boot 启动不了(没引入这个包之前能正常启动),报错如下
org.springframework.context.ApplicationContextException: Unable to start web server; nested exception is org.springframework.boot.web.server.WebServerException: Unable to start embedded Tomcat
<dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-jdbc</artifactId> <version>3.1.2</version> <!--服务器版本也是3.1.2--> <!--换成2.3.3 2.1.1都有jdk1.7的问题--> <exclusions> <!--https://blog.csdn.net/web15085181368/article/details/123714335--> <exclusion> <groupId>org.eclipse.jetty</groupId> <artifactId>jetty-runner</artifactId> </exclusion> </exclusions> </dependency>
相关阅读
评论:
↓ 广告开始-头部带绿为生活 ↓
↑ 广告结束-尾部支持多点击 ↑