java实现pdf转word
java实现pdf转word
- 前言
- pom文件
- 启动入口
- 过滤器对象
- ConvertPdfToWordWithFlowableStructure转换实现类
前言
1.java实现pdf转word。
2.纯免费开源。
3.pdf解析完会生成word文件和图片文件夹。
4.无页码限制,文本类型生成到word中,图片生成到图片文件夹中。
5.弊端:需手动将图片与文本整合成一个word文件。
仅提供一个pdf转word的实现方案,代码粗糙,老铁轻喷。
jar包地址:https://download.csdn.net/download/wyazyf/88917191
pom文件
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"><modelVersion>4.0.0</modelVersion><groupId>org.example</groupId><artifactId>wy</artifactId><version>1.0-SNAPSHOT</version><properties><maven.compiler.source>8</maven.compiler.source><maven.compiler.target>8</maven.compiler.target></properties><dependencies><dependency><groupId>org.apache.poi</groupId><artifactId>poi</artifactId><version>4.1.1</version></dependency><!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml --><dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml</artifactId><version>4.1.1</version></dependency><!-- https://mvnrepository.com/artifact/com.itextpdf/itextpdf --><dependency><groupId>com.itextpdf</groupId><artifactId>itextpdf</artifactId><version>5.5.13.1</version></dependency><!-- https://mvnrepository.com/artifact/com.itextpdf/itext-asian --><dependency><groupId>com.itextpdf</groupId><artifactId>itext-asian</artifactId><version>5.2.0</version></dependency><dependency><groupId>org.apache.pdfbox</groupId><artifactId>pdfbox</artifactId><version>2.0.0</version></dependency><dependency><groupId>org.apache.pdfbox</groupId><artifactId>pdfbox-tools</artifactId><version>2.0.0</version></dependency><dependency><groupId>org.apache.pdfbox</groupId><artifactId>jbig2-imageio</artifactId><version>3.0.2</version></dependency><dependency><groupId>com.intellij</groupId><artifactId>forms_rt</artifactId><version>7.0.3</version></dependency></dependencies><repositories><repository><id>alimaven</id><name>aliyun maven</name><url>http://maven.aliyun.com/nexus/content/groups/public/</url></repository></repositories>
</project>
启动入口
import com.sun.deploy.util.StringUtils;
import org.apache.poi.util.StringUtil;import javax.swing.*;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.File;
import java.io.IOException;/*** @author wy* @create 2024-02-20 15:57*/
public class test {private JPanel Panel;private JLabel JLabel;private JButton button;private JButton selectButton;private JFileChooser jf ;public static void main(String[] args) {JFrame frame = new JFrame("test");JPanel panel = new test().Panel;panel.setSize(500,300);frame.setContentPane(panel);frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);frame.pack();frame.setSize(400, 200);// frame.setLocation(3);frame.setVisible(true);}public test() {selectButton.addActionListener(new ActionListener() {@Overridepublic void actionPerformed(ActionEvent e) {FileFilterTest fileFilter=new FileFilterTest (); //创建过滤器对象jf=new JFileChooser();jf.setFileFilter(fileFilter); //对JFileChooser设置过滤器jf.showOpenDialog(null);}});button.addActionListener(new ActionListener() {@Overridepublic void actionPerformed(ActionEvent e) {if(jf==null){JOptionPane.showMessageDialog(Panel, "请选择一个pdf文件", "标题",JOptionPane.WARNING_MESSAGE);}File selectedFile = jf.getSelectedFile(); // 获取选择的文件String fielPath = selectedFile.getPath();if(fielPath==null||(fielPath!=null && fielPath=="")){JOptionPane.showMessageDialog(Panel, "请选择一个pdf文件", "标题",JOptionPane.WARNING_MESSAGE);}try {ConvertPdfToWordWithFlowableStructure pdfToWord=new ConvertPdfToWordWithFlowableStructure();pdfToWord.pdfToWordOrPhoto(fielPath);} catch (IOException ioException) {ioException.printStackTrace();}}});}}
过滤器对象
/*** @author wy* @create 2024-02-20 16:24*/
public class FileFilterTest extends javax.swing.filechooser.FileFilter{public boolean accept(java.io.File f) {if (f.isDirectory())return true;return f.getName().endsWith(".pdf"); //设置为选择以.pdf为后缀的文件}public String getDescription(){return ".pdf";}
}
ConvertPdfToWordWithFlowableStructure转换实现类
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.tools.imageio.ImageIOUtil;import java.awt.image.BufferedImage;
import java.io.*;public class ConvertPdfToWordWithFlowableStructure {public void pdfToWordOrPhoto(String pdfFile) throws IOException {try{// String pdfFile = "C:\\Users\\Administrator\\Desktop\\1.pdf";PDDocument doc = PDDocument.load(new File(pdfFile));int pagenumber = doc.getNumberOfPages();pdfFile = pdfFile.substring(0, pdfFile.lastIndexOf("."));String fileName = pdfFile + ".doc";File file = new File(fileName);if (!file.exists()){file.createNewFile();}FileOutputStream fos = new FileOutputStream(fileName);Writer writer = new OutputStreamWriter(fos, "UTF-8");PDFTextStripper stripper = new PDFTextStripper();stripper.setSortByPosition(true);// 排序stripper.setStartPage(1);// 设置转换的开始页stripper.setEndPage(pagenumber);// 设置转换的结束页stripper.writeText(doc, writer);writer.close();System.out.println("pdf文字转换word成功!");//开始下载图片PDFRenderer pdfRenderer = new PDFRenderer(doc);for(int i = 0;i<pagenumber;i++){// 设置页数(首页从0开始)、每英寸点数、图片类型BufferedImage bufferedImage = pdfRenderer.renderImageWithDPI(i, 300, ImageType.RGB);String currentCoverPath = pdfFile + "/" + i + "." + "png";// 创建图片文件对象File file1 = new File(currentCoverPath);if (!file1.getParentFile().exists()) {file1.getParentFile().mkdirs();}if (!file1.exists()) {file1.createNewFile();}else{file1.delete();file1.createNewFile();}// 将图片写入到图片对象中ImageIOUtil.writeImage(bufferedImage, currentCoverPath, 300);}doc.close();System.out.println("pdf图片下载成功!");}catch (IOException e){e.printStackTrace();}}
}