批量插入,用mybatis还是jdbc
用mybatis默认的配置插入几十万数据,花了几十分钟没插进去。想到了用mybatis来控制事务,于是用《mybatis批量更新》里提到的方法,插了几行数据就卡住了。于是换成了最原始的jdbc方法:
package com.xx.bookdata.service; import java.io.InputStream; import java.sql.Connection; import java.sql.Date; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.SQLException; import java.util.Properties; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.xx.bookdata.entity.ChapterInfo; public class ConnectionUtil { private static Log log = LogFactory.getLog(ConnectionUtil.class); private static ConnectionUtil instace; private static Connection conn; private static PreparedStatement pstm; private static int batchNum = 0; public static ConnectionUtil getInstance() { if (instace == null) { Properties properties = new Properties(); // 使用ClassLoader加载properties配置文件生成对应的输入流 InputStream in = ConnectionUtil.class.getClassLoader().getResourceAsStream("conf/jdbc.properties"); // 使用properties对象加载输入流 try { properties.load(in); Class.forName(properties.getProperty("jdbc.driver")); conn = DriverManager.getConnection(properties.getProperty("jdbc.url"), properties.getProperty("jdbc.username"), properties.getProperty("jdbc.password")); conn.setAutoCommit(false); pstm = conn.prepareStatement("insert into biz_book_base_chapter(book_id,`name`,`no`,parse_type,created_time,update_time) values(?,?,?,?,?,?)"); instace=new ConnectionUtil(); } catch (Exception e) { log.error(e.getMessage(), e); } } return instace; } public void addBatch(ChapterInfo info, boolean executeBatch) { /* String sql="insert into biz_book_base_chapter(book_id,`name`,`no`,parse_type,created_time,update_time) values(" + info.getBookId() + ",'" + info.getName() + "'," + info.getNo() + "," + info.getParseType() + ",now(),now())"; */ try { pstm.setInt(1, info.getBookId()); pstm.setString(2, info.getName()); pstm.setInt(3, info.getNo()); pstm.setInt(4, info.getParseType()); pstm.setDate(5, new Date(new java.util.Date().getTime())); pstm.setDate(6, new Date(new java.util.Date().getTime())); pstm.addBatch(); batchNum++; if (batchNum % 100 == 0) { pstm.executeBatch(); conn.commit(); } else { if (executeBatch) { pstm.executeBatch(); conn.commit(); } } } catch (SQLException e) { log.error(e.getMessage(), e); } } public static void close() { if(pstm!=null) { try { pstm.close(); }catch(Exception e) { log.error(e.getMessage(),e); } } if(conn!=null) { try { conn.close(); }catch(Exception e) { log.error(e.getMessage(),e); } } } }
配置文件(conf/jdbc.properties):
jdbc.driver=com.mysql.jdbc.Driver jdbc.url=jdbc:mysql://localhost/xx?useUnicode=true&characterEncoding=UTF8&rewriteBatchedStatements=true jdbc.username=root jdbc.password=xx
ok,使用后效率提高了10倍,120万数据只需要4分钟。
哈哈,写完之后发现,这种方法有点"笨",更简单的方法还是用mybatis。不过sql改下,insert into tab values()(),就是批量插入就行了。如下:
insert into biz_book_base_chapter(book_id,name,no,parse_type,created_time,updated_time) values <foreach collection="chapterInfos" item="chapterInfo" index="index" separator=","> (#{chapterInfo.bookId},#{chapterInfo.name},#{chapterInfo.no},#{chapterInfo.parseType},now(),now()) </foreach>
另外记录一点,String.trim()方法并不能去掉中文空格\t等字符。所以可以用以下方法:
/** * 去除字符串中头部和尾部所包含的空格(包括:空格(全角,半角)、制表符、换页符等) * @param s * @return */ public static String trim(String s){ String result = ""; if(null!=s && !"".equals(s)){ result = s.replaceAll("^[ *| *| *|\\s*]*", "").replaceAll("[ *| *| *|\\s*]*$", ""); } return result; }
上面的insert values多行数据的sql不靠谱,2020-11我用这个方法插入一批查询字符串,已确认没有重复字符串,但是还是报唯一键冲突。没办法,那就用mybatis的普通插入吧,为了实现类批量插入,手工控制事务,可查看《手动控制spring事务》 。模板如下:
@Autowired private WordMapper wordMapper; @Value("${search.importPath}") private String importFilePath; private boolean uploading=false; private int uploadNum=0; //最大重复数量 private static final int maxDupNum=100; //批量提交大小 private static final int batchNum=100; // 1.获取事务控制管理器 @Autowired private DataSourceTransactionManager transactionManager; public String importFile(String fileName,Integer rewriteType) { //验参 DefaultTransactionDefinition def = new DefaultTransactionDefinition(); def.setPropagationBehavior(TransactionDefinition.PROPAGATION_REQUIRES_NEW); // 事物隔离级别,开启新事务,这样会比较安全些。 TransactionStatus transaction = transactionManager.getTransaction(def); // 获得事务状态 try{ if(uploading){ return "正在上传中,已上传:"+uploadNum; } BufferedReader br=null; uploading=true; try{ uploadNum=0; File file=new File(importFilePath+File.separator+fileName); br=new BufferedReader(new InputStreamReader(new FileInputStream(file),"UTF-8")); String line=null; List<SearchRewriteWord> words=new ArrayList<>(); while((line=br.readLine())!=null){ uploadNum++; SearchRewriteWord srw;//build实体类 words.add(srw); if(words.size()%batchNum==0){ addBatch(words,rewriteType,0); words.clear(); } } if(words.size()>0){ addBatch(words,rewriteType,0); } }finally { uploading=false; if(br!=null){ br.close(); } } transactionManager.commit(transaction); }catch(Exception e) { transactionManager.rollback(transaction); log.error(e.getMessage(),e); } return "成功导入:"+uploadNum+"条"; } private void addBatch(List<SearchRewriteWord> words,int rewriteType,int dupNum) throws Exception{ try { if(words==null||words.size()==0){ return ; } //去掉超长的 for(int i=0;i<words.size();i++){ SearchRewriteWord sw=words.get(i); if(sw.getQueryWord()!=null&&sw.getQueryWord().length()>256){ words.remove(i); i--; log.info(sw.getQueryWord()+" is too long,unimport!"); } } //wordMapper.addBathWord(words); for(SearchRewriteWord w:words){ wordMapper.addWord(w); } }catch(DuplicateKeyException e){ log.error(e.getMessage()); if(dupNum++>maxDupNum){ log.error("重复query过多(每"+batchNum+"个多于"+maxDupNum+"个重复词),请去掉重复词"); } //一条一条插入,忽略重复键异常,这种的不导入 for(SearchRewriteWord sw:words){ try{ wordMapper.addWord(sw); }catch(DuplicateKeyException e1){ log.error(e.getMessage()); } } }catch(Exception e){ log.info(e.getClass()); log.error(e.getMessage(),e); throw e; } }
相关阅读
微信扫描-捐赠支持
加入QQ群-技术交流
评论:
↓ 广告开始-头部带绿为生活 ↓
↑ 广告结束-尾部支持多点击 ↑