批量插入,用mybatis还是jdbc

用mybatis默认的配置插入几十万数据,花了几十分钟没插进去。想到了用mybatis来控制事务,于是用《mybatis批量更新》里提到的方法,插了几行数据就卡住了。于是换成了最原始的jdbc方法:


package com.xx.bookdata.service;

import java.io.InputStream;
import java.sql.Connection;
import java.sql.Date;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.Properties;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.xx.bookdata.entity.ChapterInfo;

public class ConnectionUtil {
	private static Log log = LogFactory.getLog(ConnectionUtil.class);
	private static ConnectionUtil instace;
	private static Connection conn;
	private static PreparedStatement pstm;
	private static int batchNum = 0;

	public static ConnectionUtil getInstance() {
		if (instace == null) {
			
			Properties properties = new Properties();
			// 使用ClassLoader加载properties配置文件生成对应的输入流
			InputStream in = ConnectionUtil.class.getClassLoader().getResourceAsStream("conf/jdbc.properties");
			// 使用properties对象加载输入流
			try {
				properties.load(in);
				Class.forName(properties.getProperty("jdbc.driver"));
				conn = DriverManager.getConnection(properties.getProperty("jdbc.url"),
						properties.getProperty("jdbc.username"), properties.getProperty("jdbc.password"));
				conn.setAutoCommit(false);
				pstm = conn.prepareStatement("insert into biz_book_base_chapter(book_id,`name`,`no`,parse_type,created_time,update_time) values(?,?,?,?,?,?)");
				instace=new ConnectionUtil();
			} catch (Exception e) {
				log.error(e.getMessage(), e);
			}
			
		}
		return instace;
	}

	public void addBatch(ChapterInfo info, boolean executeBatch) {
		/*
		String sql="insert into biz_book_base_chapter(book_id,`name`,`no`,parse_type,created_time,update_time) values("
				+ info.getBookId() + ",'" + info.getName() + "'," + info.getNo() + "," + info.getParseType()
				+ ",now(),now())";
				*/
		try {
			pstm.setInt(1, info.getBookId());
			pstm.setString(2, info.getName());
			pstm.setInt(3, info.getNo());
			pstm.setInt(4, info.getParseType());
			pstm.setDate(5, new Date(new java.util.Date().getTime()));
			pstm.setDate(6, new Date(new java.util.Date().getTime()));
			pstm.addBatch();
			batchNum++;

			if (batchNum % 100 == 0) {

				pstm.executeBatch();
				conn.commit();

			} else {
				if (executeBatch) {
					pstm.executeBatch();
					conn.commit();
				}
			}

		} catch (SQLException e) {
			log.error(e.getMessage(), e);
		}
	}
	public static void close() {
		if(pstm!=null) {
			try {
				pstm.close();
			}catch(Exception e) {
				log.error(e.getMessage(),e);
			}
		}
		if(conn!=null) {
			try {
				conn.close();
			}catch(Exception e) {
				log.error(e.getMessage(),e);
			}
		}
	}

}


配置文件(conf/jdbc.properties):

jdbc.driver=com.mysql.jdbc.Driver
jdbc.url=jdbc:mysql://localhost/xx?useUnicode=true&characterEncoding=UTF8&rewriteBatchedStatements=true
jdbc.username=root
jdbc.password=xx

ok,使用后效率提高了10倍,120万数据只需要4分钟。

哈哈,写完之后发现,这种方法有点"笨",更简单的方法还是用mybatis。不过sql改下,insert into tab values()(),就是批量插入就行了。如下:

insert into biz_book_base_chapter(book_id,name,no,parse_type,created_time,updated_time) values
		<foreach collection="chapterInfos" item="chapterInfo" index="index"  separator=",">
			(#{chapterInfo.bookId},#{chapterInfo.name},#{chapterInfo.no},#{chapterInfo.parseType},now(),now())
		</foreach>

另外记录一点,String.trim()方法并不能去掉中文空格\t等字符。所以可以用以下方法:


/** 
     * 去除字符串中头部和尾部所包含的空格(包括:空格(全角,半角)、制表符、换页符等) 
     * @param s 
     * @return 
     */  
    public static String trim(String s){  
        String result = "";  
        if(null!=s && !"".equals(s)){  
            result = s.replaceAll("^[ *| *| *|\\s*]*", "").replaceAll("[ *| *| *|\\s*]*$", "");  
        }  
        return result;  
    } 


上面的insert values多行数据的sql不靠谱,2020-11我用这个方法插入一批查询字符串,已确认没有重复字符串,但是还是报唯一键冲突。没办法,那就用mybatis的普通插入吧,为了实现类批量插入,手工控制事务,可查看《手动控制spring事务》 。模板如下:

@Autowired
private WordMapper wordMapper;

@Value("${search.importPath}")
private String importFilePath;
private boolean uploading=false;
private int uploadNum=0;
//最大重复数量
private static final int maxDupNum=100;
//批量提交大小
private static final int batchNum=100;
// 1.获取事务控制管理器
@Autowired
private DataSourceTransactionManager transactionManager;

public String importFile(String fileName,Integer rewriteType)  {
	//验参
    DefaultTransactionDefinition def = new DefaultTransactionDefinition();
    def.setPropagationBehavior(TransactionDefinition.PROPAGATION_REQUIRES_NEW); // 事物隔离级别,开启新事务,这样会比较安全些。
    TransactionStatus transaction = transactionManager.getTransaction(def); // 获得事务状态
    try{
        if(uploading){
            return "正在上传中,已上传:"+uploadNum;
        }

        BufferedReader br=null;
        uploading=true;
        try{
            uploadNum=0;
            File file=new File(importFilePath+File.separator+fileName);
            
            br=new BufferedReader(new InputStreamReader(new FileInputStream(file),"UTF-8"));


            String line=null;
            List<SearchRewriteWord> words=new ArrayList<>();
            while((line=br.readLine())!=null){
                uploadNum++;
                SearchRewriteWord srw;//build实体类
                words.add(srw);
                if(words.size()%batchNum==0){
                    addBatch(words,rewriteType,0);
                    words.clear();
                }

            }
            if(words.size()>0){
                addBatch(words,rewriteType,0);
            }

        }finally {
            uploading=false;
            if(br!=null){
                br.close();
            }
        }

        transactionManager.commit(transaction);
    }catch(Exception e) {

        transactionManager.rollback(transaction);
        log.error(e.getMessage(),e);
    }
    return "成功导入:"+uploadNum+"条";
}

private void  addBatch(List<SearchRewriteWord> words,int rewriteType,int dupNum) throws Exception{
    try {
        if(words==null||words.size()==0){
            return ;
        }
        //去掉超长的
        for(int i=0;i<words.size();i++){
            SearchRewriteWord sw=words.get(i);
            if(sw.getQueryWord()!=null&&sw.getQueryWord().length()>256){
                words.remove(i);
                i--;
                log.info(sw.getQueryWord()+" is too long,unimport!");
            }
        }
        //wordMapper.addBathWord(words);

        for(SearchRewriteWord w:words){
            wordMapper.addWord(w);
        }
    }catch(DuplicateKeyException   e){
    	log.error(e.getMessage());
        if(dupNum++>maxDupNum){
            log.error("重复query过多(每"+batchNum+"个多于"+maxDupNum+"个重复词),请去掉重复词");
        }
        //一条一条插入,忽略重复键异常,这种的不导入
        for(SearchRewriteWord sw:words){
            try{
                wordMapper.addWord(sw);
            }catch(DuplicateKeyException   e1){
                log.error(e.getMessage());
            }
        }
    }catch(Exception e){
        log.info(e.getClass());
        log.error(e.getMessage(),e);
        throw e;
    }

}


文/程忠 浏览次数:0次   2018-07-18 17:20:27

相关阅读

微信扫描-捐赠支持
加入QQ群-技术交流

评论: