导入大数据量多sheet的excel文件

excel导出

阿胜yss

337人浏览 · 2023-03-05 13:25:09

阿胜yss · 2023-03-05 13:25:09 发布

今天做大数据量导入数据库的操作
测试代码:

	@Autowired
    @Qualifier("threadPoolExecutor")
    private ThreadPoolExecutor executorService;
    @Test
    public void readExcel(){
        String fileName = "d:\\test.xlsx";
        List<Callable<Object>> tasks = new ArrayList<>();
        for (int i = 0;i<500;i++){
            int num = i;
            tasks.add(()->{
                EasyExcel.read(fileName,Example.class,new ImportListener<Example>(iExampleService)).sheet(num).doRead();
                return null;
            });
        }
        try {
            executorService.invokeAll(tasks);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }

ImportListener.class

package com.example.admin.execl;

import com.alibaba.excel.context.AnalysisContext;
import com.alibaba.excel.read.listener.ReadListener;
import com.alibaba.excel.util.ListUtils;
import com.alibaba.fastjson2.JSON;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.baomidou.mybatisplus.extension.service.IService;
import com.example.admin.mapper.ExampleMapper;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.scheduling.annotation.Async;
import org.springframework.transaction.annotation.Transactional;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.*;
import java.util.concurrent.atomic.AtomicInteger;

/**
 * @author yss
 * @date 2023/3/3
 */
@Slf4j
@RequiredArgsConstructor
public class ImportListener<T> implements ReadListener<T> {

    private final IService<T> iService;

    /**
     * 每隔10000条存储数据库，然后清理list ，方便内存回收
     */
    private static final int BATCH_COUNT = 10000;
    /**
     * 缓存的数据
     */
//    private List<T> cachedDataList = ListUtils.newArrayListWithExpectedSize(BATCH_COUNT);

    private ThreadLocal<List<T>> cachedDataList = ThreadLocal.withInitial(ArrayList::new);

    private static AtomicInteger count = new AtomicInteger(0);

    private final ThreadPoolExecutor threadPoolExecutor = new ThreadPoolExecutor(100,
            100,
            60,
            TimeUnit.SECONDS,
            new LinkedBlockingQueue<>());

    // 1. 多线程批量读   2.转化为list 校验 3.多线程往数据库存
    @Override
    @Transactional(rollbackFor = Exception.class)
    public void invoke(T entity, AnalysisContext analysisContext) {
        // 在这做校验
        cachedDataList.get().add(entity);
        // 达到BATCH_COUNT了，需要去存储一次数据库，防止数据几万条数据在内存，容易OOM
        if (cachedDataList.get().size() >= BATCH_COUNT) {
            batchInsert();
//            asyncBatchInsert();
        }
    }

    @Override
    public void doAfterAllAnalysed(AnalysisContext analysisContext) {
        batchInsert();
//        asyncBatchInsert();
        log.info("所有数据解析完成！");
    }

    public void batchInsert() {
        if (!cachedDataList.get().isEmpty()){
            log.info("第"+count.addAndGet(1)+"次插入"+cachedDataList.get().size()+"条数据");
            iService.saveBatch(cachedDataList.get());
            // 释放内存
            cachedDataList.get().clear();
            log.info("存储数据库成功!");
        }
    }

    public void batchInsert(List<T> lists) {
        log.info("lists大小:"+lists.size());
        iService.saveBatch(lists);
        log.info("第"+count.addAndGet(1)+"次插入"+lists.size()+"条数据");
        lists.clear();
        log.info("存储数据库成功!");
    }

    @Async
    public void asyncBatchInsert() {
        if (!cachedDataList.get().isEmpty()){
            List<T> lists = new ArrayList<>(cachedDataList.get());
            // 开启另外一个线程来批量插入数据库
            threadPoolExecutor.submit(()->{
                log.info("lists大小:"+lists.size());
                iService.saveBatch(lists);
//                log.info("第"+count.addAndGet(1)+"次插入"+lists.size()+"条数据");
//                lists.clear();
//                log.info("存储数据库成功!");
            });
            // 释放内存
            cachedDataList.get().clear();
        }
    }

}

这里有一个问题: 多线程并发写入数据库时，发现无法存入数据库，也没有报任何错误，在这个调试上耗费了蛮多时间，故退而求其次，单线程写入。

多线程读取excel文档，然后每个线程把list数据插入数据库
结果:
在这里插入图片描述

耗时大概100s左右，不错。

魔乐社区

魔乐社区（Modelers.cn) 是一个中立、公益的人工智能社区，提供人工智能工具、模型、数据的托管、展示与应用协同服务，为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作，由全产业链共同建设、共同运营、共同享有，推动国产AI生态繁荣发展。

更多推荐

替你试过了，消费级显卡可以跑的开源文生图SOTA模型，顶级渲染、高密度文本绘图

魔乐社区

量化挑战赛冠军专访：4小时啃下W4A8量化，我靠的是这些经验

魔乐社区

小参数・大码力・易部署 | Qwen3.6-27B上线魔乐社区，基于昇腾的部署教程来了

继一周前模型开源发布后，千问再度开源Qwen3.6-27B —— 一个拥有270亿参数的稠密多模态模型，也是社区呼声最高的模型规格。Qwen3.6-27B 依然支持多模态思考与非思考模式，在智能体编程方面达到了旗舰级表现，全面超越前代开源旗舰 Qwen3.5-397B-A17B（总参数397B / 激活参数17B的MoE模型）。作为稠密架构，它无需MoE路由即可部署，是开发者在实用、可广泛部署规模