Java读取HDFS上的Excel文件
一、背景
目前公司在做数字化转型,很多东西都是在刚刚起步状态,比如数据采集,因为有涉及到安全的问题,公司搞了内系统和外系统(也就是内网和外网),这两个系统不相通。很多数据都是通过Excel表格来做传输。本文是在这样的背景下想用java去操作excel的数据。
二、POI的介绍
1.由apache公司提供
2.Java编写的免费开源的跨平台的Java API
3.提供API给Java程序对Microsoft Office格式档案读和写的功能
2.1 为何要使用poi?
因为相比较于其他的插件,比如Jxl:消耗小,但是图片和图形支持的有限。而poi的功能更加完善,操作更简单。
2.2 使用提前,需要导入两个依赖:
<dependencies>
<!-- 添加依赖poi:作用是解析excel表格 -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.16</version>
</dependency>
<!-- 添加依赖poi-ooxml:作用是解析excel表格 -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.16</version>
</dependency>
<!-- 添加依赖poi-ooxml-schemas -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.16</version>
</dependency>
<!-- 添加mysql驱动 -->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.16</version>
<scope>compile</scope>
</dependency>
<!-- 添加druid连接池 -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>druid</artifactId>
<version>1.0.9</version>
</dependency>
<!--jdbcTemplate-->
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-core</artifactId>
<version>4.1.2.RELEASE</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-jdbc</artifactId>
<version>4.1.2.RELEASE</version>
<scope>compile</scope>
</dependency>
</dependencies>
2.3 免费POI包的结构:
HSSF ----- 读写Microsoft Excel XLS
XSSF ----- 读写Microsoft Excel OOXML XLSX
HWPF ---- 读写Microsoft Word DOC
HSLF ---- 提供读写Microsoft PowerPoint
2.4 Poi封装的对象
XSSFWorkbook:工作薄
XSSFsheet:工作表
Row:行
Cell:单元格
三、实例操作
3.1 第一种场景
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
/**
* @ author: lzl
* @ create: 2022-10-01 10:01
* @ name: Demo1
*/
public class Study1 {
public static void main(String[] args) throws Exception{
//1.获取工作薄
XSSFWorkbook workbook=new XSSFWorkbook("F:\\python\\test\\NationalDay.xlsx");
//2.获取工作表
XSSFSheet sheet = workbook.getSheetAt(0);
//3.获取行
for (Row row:sheet) {
//4.获取单元格,遍历获取
for (Cell cell:row) {
//5.获取单元格中的内容(注意字段的格式)
String value = cell.getStringCellValue();
System.out.println(value);
}
}
//6.释放资源
workbook.close();
}
}
但是这种场景是有局限性的,比如我将一个日期的放进去,它就会报错了。
它是按行来读取的,两列的情况会按照一列来输出
普通for循环也可以。
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
/**
* @ author: lzl
* @ create: 2022-10-01 10:01
* @ name: Demo1
*/
public class Study1 {
public static void main(String[] args) throws Exception{
//1.获取工作薄
XSSFWorkbook workbook=new XSSFWorkbook("F:\\python\\test\\NationalDay.xlsx");
//2.获取工作表
XSSFSheet sheet = workbook.getSheetAt(0);
// //3.获取行
// for (Row row:sheet) {
// //4.获取单元格,遍历获取
// for (Cell cell:row) {
// //5.获取单元格中的内容(注意字段的格式)
// String value = cell.getStringCellValue();
// System.out.println(value);
// }
// }
// 通过普通for循环获取
//开始索引0 结束索引
int lastRowNum = sheet.getLastRowNum();
for (int i = 0; i <= lastRowNum; i++) {
XSSFRow row = sheet.getRow(i);
if (row!=null) {
//
short cellNum = row.getLastCellNum();
for(int j =0; j <= cellNum; j++) {
XSSFCell cell = row.getCell(j);
if(cell!=null) {
String stringCellValue = cell.getStringCellValue();
System.out.println(stringCellValue);
}
}
}
}
//6.释放资源
workbook.close();
}
}
3.2 第二种场景
—向文件中读取数据
1.创建工作薄
2.获取工作表
3.遍历工作表获得行对象
4.遍历行对象获取单元对象
5.获得单元格中的值
—向Excel文件写入数据
1.创建一个Excel文件
2.创建工作表
3.创建行
4.创建单元格赋值
5.通过输出流将对象下载到磁盘中
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.FileOutputStream;
/**
* @ author: lzl
* @ create: 2022-10-01 11:09
* @ name: study2
*/
public class study2 {
public static void main(String[] args) throws Exception{
// 1.创建工作簿
XSSFWorkbook workbook = new XSSFWorkbook();
// 2.创建工作表
XSSFSheet sheet = workbook.createSheet("工作表一");
// 3.创建行
XSSFRow row = sheet.createRow(0);
// 4.创建单元格
row.createCell(0).setCellValue("国庆节");
row.createCell(1).setCellValue("放假了");
row.createCell(2).setCellValue("在家学习");
XSSFRow row1 = sheet.createRow(1);
// 4.创建单元格
row1.createCell(0).setCellValue("国庆节");
row1.createCell(1).setCellValue("太阳好");
row1.createCell(2).setCellValue("学完出去玩");
//输出流对象
FileOutputStream out = new FileOutputStream("F:\\python\\test\\NationalDay_1.xlsx");
workbook.write(out);
out.flush();//内容进行刷新
//关闭资源
out.close();
workbook.close();
System.out.println("写入成功!");
}
}
3.3 第三种场景
1.读取Excel数据到数据库
2.将数据库的数据写入excel
3.增加样式
第一部分:读取excel文件暂时先放入集合中。
package com.lzl.excel.gmall.web;
import com.lzl.excel.gmall.domain.Product;//这部分在后面
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
/**
* @ author: lzl
* @ create: 2022-10-06 19:31
* @ name: Show
*/
public class Show {
public static void main(String[] args) throws IOException {
//通过键盘录入Scanner
Scanner sc= new Scanner(System.in);
System.out.println("请输入您要选择的功能:1.导入 2.导出");
int num = sc.nextInt();
if (num == 1) {
//1.导入
//1.1读取excel表中的数据
System.out.println("请输入您要读取的文件位置(不包含空格)");
String path = sc.next();
List<Product> productList = read(path); //调用读的方法
System.out.println(productList);
//1.2将数据写入到数据库中
} else if(num == 2) {
//2.导出
//2.1 读取数据库中的数据
//2.2 将数据写入到excel表格中
} else {
System.out.println("输入有误!请重新启动");
}
}
public static List<Product> read(String path) throws IOException { //定义一个读取的集合方法
List<Product> productList = new ArrayList<>();
//1.获取工作簿
XSSFWorkbook xssfWorkbook = new XSSFWorkbook(path);
//2.获取工作表
XSSFSheet sheet = xssfWorkbook.getSheetAt(0);
int lastRowNum = sheet.getLastRowNum();
for (int i = 1; i <= lastRowNum ; i++) { //循环获取每一行
XSSFRow row = sheet.getRow(i);
if(row != null) {
List<String> list = new ArrayList<>();
for (Cell cell : row) { //循环获取每一列
if (cell != null ) {
cell.setCellType(Cell.CELL_TYPE_STRING);//转换每一列类型为string
String value = cell.getStringCellValue();//读取数据
if(value != null && ! "".equals(value)){ //判断行如果不为空,则取它的值
list.add(value);//将读取的数据放入集合中
}
}
}
if(list.size() > 0) { //如果集合不为0,则定义每一列的类型
Product product = new Product(Integer.parseInt(list.get(0)), list.get(1), Double.parseDouble(list.get(2)), Integer.parseInt(list.get(3))); //默认是string,所以第2格个不用强制转换
productList.add(product);
}
}
}
return productList;
}
}
运行结果:
存储MySQL中:
druid.properties:
driverClassName = com.mysql.cj.jdbc.Driver #mysql6.0之后推荐使用这个驱动包
url=jdbc:mysql://192.168.10.102:3306/excel?useUnicode=true&characterEncoding=utf8&serverTimezone=UTC&useSSL=true
username = root
password = ggnggn
initialSize = 5
maxActive = 10
maxWait = 3000
JDBCUtils:
package com.lzl.excel.gmall.utils;
import com.alibaba.druid.pool.DruidDataSourceFactory;
import javax.sql.DataSource;
import java.io.IOException;
import java.io.InputStream;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.Properties;
/**
* @ author: lzl
* @ create: 2022-10-06 19:05
* @ name: JDBCutils
* @ 描述 :JDBC工具类,使用druid连接池
*/
public class JDBCUtils {
private static DataSource ds;
static {
try {
//1.加载配置文件
Properties pro = new Properties();
//2.使用ClassLoader加载配置文件,获取字节输入流
InputStream is = JDBCUtils.class.getClassLoader().getResourceAsStream("druid.properties");
pro.load(is);
//3.初始化连接对象
ds= DruidDataSourceFactory.createDataSource(pro);
} catch (IOException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 获取连接池对象
*/
public static DataSource getDataSource(){
return ds;
}
/**
* 获取连接Connection对象
*/
public static Connection getConnection() throws SQLException {
return ds.getConnection();
}
}
ProductDao
package com.lzl.excel.gmall.dao;
import com.lzl.excel.gmall.domain.Product;
import java.util.List;
/**
* @ author: lzl
* @ create: 2022-10-06 19:02
* @ name: ProductDao
*/
public interface ProductDao {
void save(Product product);
List<Product> findAll();
}
ProductDaoImpl
package com.lzl.excel.gmall.dao.Impl;
import com.lzl.excel.gmall.dao.ProductDao;
import com.lzl.excel.gmall.domain.Product;
import com.lzl.excel.gmall.utils.JDBCUtils;
import org.springframework.jdbc.core.BeanPropertyRowMapper;
import org.springframework.jdbc.core.JdbcTemplate;
import java.util.List;
/**
* @ author: lzl
* @ create: 2022-10-06 19:01
* @ name: ProductDaoImpl
*/
public class ProductDaoImpl implements ProductDao {
JdbcTemplate jdbcTemplate=new JdbcTemplate(JDBCUtils.getDataSource());
@Override
public void save(Product product) {
String sql="insert into product values(?,?,?,?)";
jdbcTemplate.update(sql,product.getPid(),product.getPname(),product.getPrice(),product.getPstock());
}
@Override
public List<Product> findAll() {
String sql="select * from product";
return jdbcTemplate.query(sql,new BeanPropertyRowMapper<Product>(Product.class));
}
}
ProductServiceImpl
package com.lzl.excel.gmall.serice.Impl;
import com.lzl.excel.gmall.dao.ProductDao;
import com.lzl.excel.gmall.dao.Impl.ProductDaoImpl;
import com.lzl.excel.gmall.domain.Product;
import com.lzl.excel.gmall.serice.ProductService;
import java.util.List;
/**
* @ author: lzl
* @ create: 2022-10-06 19:03
* @ name: ProductServiceImpl
*/
public class ProductServiceImpl implements ProductService{
private ProductDao productDao=new ProductDaoImpl();
@Override
public void save(List<Product> productList) {
for (Product product : productList) {
productDao.save(product);
}
}
@Override
public List<Product> findAll() {
return productDao.findAll();
}
}
ProductService
package com.lzl.excel.gmall.serice;
import com.lzl.excel.gmall.domain.Product;
import java.util.List;
/**
* @ author: lzl
* @ create: 2022-10-06 19:02
* @ name: ProductService
*/
public interface ProductService {
void save(List<Product> productList);
List<Product> findAll();
}
show
package com.lzl.excel.gmall.web;
import com.lzl.excel.gmall.domain.Product;
import com.lzl.excel.gmall.serice.ProductService;
import com.lzl.excel.gmall.serice.Impl.ProductServiceImpl;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.IndexedColors;
import org.apache.poi.xssf.usermodel.*;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
/**
* @ author: lzl
* @ create: 2022-10-06 19:31
* @ name: Show
*/
public class Show {
public static void main(String[] args) throws IOException {
//通过键盘录入Scanner
Scanner sc = new Scanner(System.in);
System.out.println("请输入您要选择的功能:1.导入 2.导出");
int num = sc.nextInt();
ProductService productService = new ProductServiceImpl();
if (num == 1) {
//1.导入
//1.1读取excel表中的数据
System.out.println("请输入您要读取的文件位置(不包含空格)");
String path = sc.next();
List<Product> productList = read(path); //调用读的方法
System.out.println(productList);
//1.2将数据写入到数据库中
productService.save(productList);
System.out.println("数据存储MySQL成功!");
} else if (num == 2) {
//2.导出
//2.1 读取数据库中的数据
List<Product> productList = productService.findAll();
System.out.println(productList);
//2.2 将数据写入到excel表格中
System.out.println("请输入要写入的文件位置:");
String path = sc.next();
write(productList, path);
System.out.println("写入成功!");
} else {
System.out.println("输入有误!请重新启动");
}
}
public static void write( List<Product> productList,String path) throws IOException {
//1.创建一个工作薄
XSSFWorkbook xssfWorkbook=new XSSFWorkbook();
//2.创建工作表
XSSFSheet sheet = xssfWorkbook.createSheet("商品");
//单元格样式
XSSFCellStyle cellStyle = xssfWorkbook.createCellStyle();
cellStyle.setFillForegroundColor(IndexedColors.PINK.getIndex());
cellStyle.setFillPattern(CellStyle.SOLID_FOREGROUND);
//字体样式
XSSFFont font = xssfWorkbook.createFont();
font.setFontName("黑体");
font.setColor(IndexedColors.BLUE.getIndex());
cellStyle.setFont(font);
//3.创建行
XSSFRow row = sheet.createRow(0);
/* row.createCell(0).setCellValue("商品编号");
row.createCell(1).setCellValue("商品名称");
row.createCell(2).setCellValue("商品价格(单位:元/斤)");
row.createCell(3).setCellValue("商品库存(单位:吨)");*/
XSSFCell cell = row.createCell(0);
cell.setCellValue("商品编号");
cell.setCellStyle(cellStyle);
XSSFCell cell1 = row.createCell(1);
cell1.setCellValue("商品名称");
cell1.setCellStyle(cellStyle);
XSSFCell cell2 = row.createCell(2);
cell2.setCellValue("商品价格(单位:元/斤)");
cell2.setCellStyle(cellStyle);
XSSFCell cell3 = row.createCell(3);
cell3.setCellValue("商品库存(单位:吨)");
cell3.setCellStyle(cellStyle);
for (int i = 0; i < productList.size(); i++) {
XSSFRow row1 = sheet.createRow(i + 1);
row1.createCell(0).setCellValue(productList.get(i).getPid());
row1.createCell(1).setCellValue(productList.get(i).getPname());
row1.createCell(2).setCellValue(productList.get(i).getPrice());
row1.createCell(3).setCellValue(productList.get(i).getPstock());
}
FileOutputStream fileOutputStream=new FileOutputStream(path);
xssfWorkbook.write(fileOutputStream);
fileOutputStream.flush();
fileOutputStream.close();
xssfWorkbook.close();
}
public static List<Product> read(String path) throws IOException {
List<Product> productList=new ArrayList<>();
//1.获取工作薄
XSSFWorkbook xssfWorkbook = new XSSFWorkbook(path);
//2.获取工作表
XSSFSheet sheet = xssfWorkbook.getSheetAt(0);
int lastRowNum = sheet.getLastRowNum();
for (int i = 1; i <= lastRowNum ; i++) { //循环获取每一行
XSSFRow row = sheet.getRow(i);
if(row != null) {
List<String> list = new ArrayList<>();
for (Cell cell : row) { //循环获取每一列
if (cell != null ) {
cell.setCellType(Cell.CELL_TYPE_STRING);//转换每一列类型为string
String value = cell.getStringCellValue();//读取数据
if(value != null && ! "".equals(value)){ //判断行如果不为空,则取它的值
list.add(value);//将读取的数据放入集合中
}
}
}
if(list.size() > 0) { //如果集合不为0,则定义每一列的类型
Product product = new Product(Integer.parseInt(list.get(0)), list.get(1), Double.parseDouble(list.get(2)), Integer.parseInt(list.get(3))); //默认是string,所以第2格个不用强制转换
productList.add(product);
}
}
}
return productList;
}
}
运行结果:
Product部分:
package com.lzl.excel.gmall.domain;
/**
* @ author: lzl
* @ create: 2022-10-06 17:54
* @ name: product
*/
public class Product {
private Integer pid;
private String pname;
private double price;
private int pstock;
@Override
public String toString() {
return "Product {" +
"pid=" + pid +
",pname='"+ pname + '\'' +
",price=" + price +
",pstock=" + pstock +
'}';
}
public Product(Integer pid,String pname,double price, int pstock) {
this.pid = pid;
this.pname= pname;
this.price = price;
this.pstock = pstock;
}
public Integer getPid() {
return pid ;
}
public void setPid() {
this.pid = pid;
}
public String getPname () {
return pname;
}
public void setPname () {
this.pname = pname;
}
public double getPrice () {
return price ;
}
public void setPrice () {
this.price = price;
}
public int getPstock() {
return pstock;
}
public void setPstock() {
this.pstock = pstock;
}
}