hive使用2

前面介绍了hive安装http://www.highersoft.net/html/notice/notice_703.html

使用的是3.1.2版本。

今天主要是创建表,导入数据。

之前已经配置了环境变量

export HIVE_CONF_DIR=/home/chengzhong/hadoop/apache-hive-3.1.2-bin/conf


使用hive命令进入hive环境。

create database gd_monitor_platform_dev;
use gd_monitor_platform_dev;

show tables;

用sql工具生成create语句,执行

在hadoop创建,/home/chengzhong/hadoop/csv

注意这里的路径是hadoop路径,如果是



SQL工具:

在mysql的test库里:

use test;
CREATE TABLE
    dim_ddl_convert
    (
        source VARCHAR(100) NOT NULL,
        data_type1 VARCHAR(100) NOT NULL,
        target VARCHAR(100) NOT NULL,
        data_type2 VARCHAR(100),
        update_time datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
        PRIMARY KEY (source, data_type1, target)
    )
    ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='数据库表结构转换';


INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'bigint', 'hive', 'BIGINT');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'bigint', 'odps', 'BIGINT');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'binary', 'hive', 'BINARY');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'binary', 'odps', 'BINARY');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'char', 'hive', 'STRING');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'char', 'odps', 'STRING');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'date', 'hive', 'DATE');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'datetime', 'hive', 'TIMESTAMP');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'datetime', 'odps', 'DATETIME');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'decimal', 'hive', 'DECIMAL');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'decimal', 'odps', 'DOUBLE');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'double', 'hive', 'DOUBLE');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'double', 'odps', 'DOUBLE');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'float', 'hive', 'DOUBLE');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'float', 'odps', 'DOUBLE');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'int', 'hive', 'BIGINT');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'int', 'odps', 'BIGINT');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'json', 'hive', 'MAP<STRING,STRING>');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'json', 'odps', 'MAP<STRING,STRING>');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'mediumtext', 'hive', 'STRING');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'mediumtext', 'odps', 'STRING');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'smallint', 'hive', 'BIGINT');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'smallint', 'odps', 'BIGINT');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'text', 'hive', 'STRING');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'text', 'odps', 'STRING');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'time', 'hive', 'TIMESTAMP');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'time', 'odps', 'STRING');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'timestamp', 'hive', 'TIMESTAMP');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'timestamp', 'odps', 'DATETIME');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'tinyint', 'hive', 'BIGINT');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'tinyint', 'odps', 'BIGINT');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'varbinary', 'hive', 'BINARY');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'varbinary', 'odps', 'BINARY');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'varchar', 'db2', 'varchar');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'varchar', 'hive', 'STRING');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'varchar', 'odps', 'STRING');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'varchar', 'oracle', 'varchar');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'varchar', 'sqlserver', 'varchar');
INSERT INTO dim_ddl_convert (source, data_type1, target, data_type2) VALUES ('mysql', 'varchar', 'sybase', 'varchar');


# 修改GROUP_CONCAT函数返回的结果大小被MySQL为1024(字节)的默认长度限制, 比如扩大一百倍

SET SESSION group_concat_max_len = 102400;





SELECT
    a.table_name,
    b.table_comment,
    concat(
        'CREATE EXTERNAL TABLE IF NOT EXISTS  ',
        a.table_name,
        ' (',
        group_concat(
            concat(
                '`',
                a.column_name,
                '` ',
            CASE
                    WHEN a.column_type LIKE 'decimal%' THEN
                    a.column_type ELSE c.data_type2 
                END,
                " COMMENT '",
                column_comment,
                "'" 
            ) 
        ORDER BY
            a.table_name,
            a.ordinal_position 
        ),
        ") COMMENT '",
        b.table_comment,
        "' ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE;" 
) AS col_name 
FROM
    (
    SELECT
        table_schema,
        table_name,
        column_name,
        ordinal_position,
    CASE
            WHEN column_type LIKE '%decimal%' THEN
            column_type ELSE data_type 
        END AS data_type,
        column_type,
        column_comment 
    FROM
        information_schema.COLUMNS
    WHERE
        table_schema = 'gd_monitor_platform_dev' #库名
        and table_name='t_monitor_info'                #表名 
    ) AS a
    LEFT JOIN information_schema.TABLES AS b ON a.table_name = b.table_name 
    AND a.table_schema = b.table_schema
    LEFT JOIN ( SELECT * FROM dim_ddl_convert WHERE source = 'mysql' AND target = 'hive' ) AS c ON ( a.data_type = c.data_type1 OR a.data_type LIKE '%decimal%' ) 
WHERE
    b.table_type = 'BASE TABLE' 
  and a.TABLE_NAME not like 'dim_ddl_convert%'
GROUP BY
    a.TABLE_NAME,
    b.TABLE_COMMENT
;
用上面最后的select ,更换库名、表名,执行可生成hive的create语句。

建表的5种存储方式https://blog.csdn.net/ZZQHELLO2018/article/details/106175887


导出csv文件,注意以逗号分隔开字段。执行命令

LOAD DATA INPATH '/home/chengzhong/hadoop/csv/t_monitor_info_202308141504.csv' overwrite INTO TABLE t_monitor_info;

LOAD DATA INPATH '/Users/chengzhong/t_traffic_reported_data_20230721_202308141755.csv' overwrite INTO TABLE t_traffic_reported_data_20230721;


这时用select * from t_monitor_info无法查出,可用以下全集指定本地运行:

set hive.exec.mode.local.auto=true;
项目中要查询hive,引入了hive-jdbc 后,spring-boot 启动不了(没引入这个包之前能正常启动),报错如下

org.springframework.context.ApplicationContextException: Unable to start web server; nested exception is org.springframework.boot.web.server.WebServerException: Unable to start embedded Tomcat
<dependency>
	<groupId>org.apache.hive</groupId>
	<artifactId>hive-jdbc</artifactId>
	<version>3.1.2</version>
	<!--服务器版本也是3.1.2-->
	<!--换成2.3.3 2.1.1都有jdk1.7的问题-->

	<exclusions>
		<!--https://blog.csdn.net/web15085181368/article/details/123714335-->
		<exclusion>
			<groupId>org.eclipse.jetty</groupId>
			<artifactId>jetty-runner</artifactId>
		</exclusion>
	</exclusions>
</dependency>

文/程忠 浏览次数:0次   2023-08-14 18:07:04

相关阅读


评论:
点击刷新

↓ 广告开始-头部带绿为生活 ↓
↑ 广告结束-尾部支持多点击 ↑