当前位置: 首页 > news >正文

Unable to parse the default media type registry

2019独角兽企业重金招聘Python工程师标准>>> hot3.png

今天写了一段获取MIME类型的代码,对比用org.apache.tika和net.sf.jmimemagic。

jdk版本是1.8.

1.pom.xml:

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>

	<groupId>hui</groupId>
	<artifactId>TestWithMaven</artifactId>
	<version>0.0.1-SNAPSHOT</version>
	<packaging>jar</packaging>

	<name>TestWithMaven</name>
	<url>http://maven.apache.org</url>

	<properties>
		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
	</properties>

	<dependencies>
		<dependency>
			<groupId>joda-time</groupId>
			<artifactId>joda-time</artifactId>
			<version>2.9.2</version>
		</dependency>
		<dependency>
			<groupId>org.slf4j</groupId>
			<artifactId>slf4j-log4j12</artifactId>
			<version>1.7.13</version>
		</dependency>
		<dependency>
			<groupId>org.apache.ibatis</groupId>
			<artifactId>ibatis-core</artifactId>
			<version>3.0</version>
		</dependency>
		<dependency>
			<groupId>org.mybatis</groupId>
			<artifactId>mybatis</artifactId>
			<version>3.4.0</version>
		</dependency>
		<dependency>
			<groupId>mysql</groupId>
			<artifactId>mysql-connector-java</artifactId>
			<version>5.1.38</version>
		</dependency>

		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>4.12</version>
		</dependency>
		<dependency>
			<groupId>org.hamcrest</groupId>
			<artifactId>hamcrest-core</artifactId>
			<version>1.3</version>
		</dependency>

		<dependency>
			<groupId>org.springframework</groupId>
			<artifactId>spring-context-support</artifactId>
			<version>4.2.2.RELEASE</version>
		</dependency>
		<dependency>
			<groupId>org.apache.tika</groupId>
			<artifactId>tika-core</artifactId>
			<version>1.13</version>
		</dependency>

		<dependency>
			<groupId>net.sf.jmimemagic</groupId>
			<artifactId>jmimemagic</artifactId>
			<version>0.1.4</version>
		</dependency>
		<dependency>
			<groupId>xml-apis</groupId>
			<artifactId>xmlParserAPIs</artifactId>
			<version>2.0.2</version>
		</dependency>




	</dependencies>
</project>

2.FileUtils.java:

package mime;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.FileNameMap;
import java.net.URLConnection;
import java.net.URLEncoder;

import javax.activation.MimetypesFileTypeMap;

import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.springframework.mail.javamail.ConfigurableMimeFileTypeMap;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

import net.sf.jmimemagic.Magic;
import net.sf.jmimemagic.MagicException;
import net.sf.jmimemagic.MagicMatch;
import net.sf.jmimemagic.MagicMatchNotFoundException;
import net.sf.jmimemagic.MagicParseException;

public class FileUtils {

	public static String getMimeTypeByFileTypeMap(String path) {
		MimetypesFileTypeMap mimetypesFileTypeMap = new MimetypesFileTypeMap();
		// 默认没有pdf的,如果传入pdf的,会默认application/octet-stream,也没有application/xml
		mimetypesFileTypeMap.addMimeTypes("application/pdf pdf");
		File f = new File(path);
		return mimetypesFileTypeMap.getContentType(f);
	}

	public static String getMimeTypeByFileTypeMap2(String path) {
		String mimeType = null;
		int idx = path.lastIndexOf('.');
		if (idx == -1) {
			mimeType = "application/octet-stream";
		} else {
			String fileExtension = path.substring(idx).toLowerCase();
			if (fileExtension.equals(".html")) {
				mimeType = "text/html";
			} else if (fileExtension.equals(".css")) {
				mimeType = "text/css";
			} else if (fileExtension.equals(".js")) {
				mimeType = "application/javascript";
			} else if (fileExtension.equals(".gif")) {
				mimeType = "image/gif";
			} else if (fileExtension.equals(".png")) {
				mimeType = "image/png";
			} else if (fileExtension.equals(".txt")) {
				mimeType = "text/plain";
			} else if (fileExtension.equals(".xml")) {
				mimeType = "application/xml";
			} else if (fileExtension.equals(".json")) {
				mimeType = "application/json";
			} else {
				MimetypesFileTypeMap mimeTypesMap = new MimetypesFileTypeMap();
				mimeType = mimeTypesMap.getContentType(path);
			}
		}
		return mimeType;

	}

	public static String getMimeTypeBySpring(String path) {
		ConfigurableMimeFileTypeMap mimeMap = new ConfigurableMimeFileTypeMap();
		// 没有application/xml
		String contentType = mimeMap.getContentType(path);
		return contentType;
	}

	public static String getMimeByFileNameMap(String fileUrl) {
		FileNameMap fileNameMap = URLConnection.getFileNameMap();
		try {
			String mimeType = fileNameMap
					.getContentTypeFor(URLEncoder.encode(fileUrl, "UTF-8"));
			if (mimeType == null) {
				mimeType = "application/octet-stream";
			}
			return mimeType;
		} catch (UnsupportedEncodingException e) {
			e.printStackTrace();
			return "";
		}
	}

	public static String getMimeByTika(String fileUrl) {
		String mimeType = null;
		try {
			ContentHandler contenthandler = new BodyContentHandler();

			Metadata metadata = new Metadata();
			metadata.add(Metadata.CONTENT_ENCODING, "utf-8");
			metadata.set(Metadata.RESOURCE_NAME_KEY, fileUrl);

			// Parser parser = new DefaultParser();获取不到MIME类型
			Parser parser = new AutoDetectParser();

			ParseContext context = new ParseContext();
			context.set(Parser.class, parser);

			parser.parse(new FileInputStream(fileUrl), contenthandler, metadata, context);

			for (String name : metadata.names()) {
				System.out.println(name);
			}
			mimeType = metadata.get(Metadata.CONTENT_TYPE);
		} catch (IOException | TikaException e) {
			e.printStackTrace();
		} catch (SAXException e) {
			e.printStackTrace();
		}
		return mimeType;

	}

	public static String getMimeByJMimeMagic(String fileUrl) {
		MagicMatch match;
		try {
			match = Magic.getMagicMatch(new File(fileUrl), true);
			return match.getMimeType();
		} catch (MagicParseException | MagicMatchNotFoundException | MagicException e) {
			e.printStackTrace();
		}
		return "";
	}

}

3.MIMETest.java:

package mime;

public class MIMETest {
	public static void main(String[] args) {

		// src目录
		// String fileName = "funds.properties";
		String fileName = "createPerson.sql";
		// String path = getPath(fileName);

		// 绝对路径
		// String path = "E:/test/process.txt";
		// String path = "E:/test/02.jpg";
		// String path = "E:/Anheng/receiver-design.pdf";
		// String path = "E:/api/dom4j.chm";
		// String path = "E:/eclipse/ajax/pom.xml";
		// String path = "E:/test/person.json";
		// String path = "E:/test/file.java";
		// String path = "E:/test/static.ftl";
		// String path = "E:/test/rest.jerseySpring.war";
		// String path = "E:/test/upload/myeclipse.exe";
		String path = "E:/test/upload/myeclipse.ini";

		System.out.println("getMimeTypeByFileTypeMap: Mime Type of " + path + " is "
				+ FileUtils.getMimeTypeByFileTypeMap(path));

		System.out.println("getMimeTypeByFileTypeMap2: Mime Type of " + path + " is "
				+ FileUtils.getMimeTypeByFileTypeMap2(path));

		System.out.println("getMimeTypeBySpring: Mime Type of " + path + " is "
				+ FileUtils.getMimeTypeBySpring(path));

		System.out.println("getMimeByFileNameMap: Mime Type of " + path + " is "
				+ FileUtils.getMimeByFileNameMap(path));

		/* Tika会检查路径的合法性;
		 而且properties文件会返回text/x-java-properties,以上只会返回application/octet-stream;
		 .sql文件会返回text/x-sql,以上几种也是只会返回application/octet-stream;
		 .json文件会返回application/json,以上几种除非添加了这一类型,不然返回application/octet-stream;
		 .java文件会返回text/x-java-source,前两种返回application/octet-stream,后两种返回text/plain;
		 .ftl文件会返回text/html,以上返回application/octet-stream;
		 .war文件会返回application/x-tika-java-web-archive,以上返回application/octet-stream;
		 .exe文件会返回application/x-dosexec,以上返回application/octet-stream;
		 .ini文件会返回text/x-ini,以上返回application/octet-stream;
		 */

		System.out.println("getMimeByTika: Mime Type of " + path + " is "
				+ FileUtils.getMimeByTika(path));

		System.out.println("getMimeByJMimeMagic: Mime Type of " + path + " is "
				+ FileUtils.getMimeByJMimeMagic(path));

	}

	private static String getPath(String fileName) {
		String prefix = System.getProperty("user.dir");
		String fileSeparator = System.getProperty("file.separator");
		String sourcePath = fileSeparator + "src" + fileSeparator + "main" + fileSeparator
				+ "resources" + fileSeparator;
		String path = prefix + sourcePath + fileName;
		return path;
	}
}

原本只测试Tika,即不加入jmimemagic的依赖时,测试正常,后来加入jmimemagic依赖,报错如下:

Exception in thread "main" java.lang.RuntimeException: Unable to parse the default media type registry
	at org.apache.tika.mime.MimeTypes.getDefaultMimeTypes(MimeTypes.java:580)
	at org.apache.tika.config.TikaConfig.getDefaultMimeTypes(TikaConfig.java:69)
	at org.apache.tika.config.TikaConfig.<init>(TikaConfig.java:218)
	at org.apache.tika.config.TikaConfig.getDefaultConfig(TikaConfig.java:341)
	at org.apache.tika.parser.AutoDetectParser.<init>(AutoDetectParser.java:51)
	at mime.FileUtils.getMimeByTika(FileUtils.java:103)
	at mime.MIMETest.main(MIMETest.java:48)
Caused by: org.apache.tika.mime.MimeTypeException: Invalid type configuration
	at org.apache.tika.mime.MimeTypesReader.read(MimeTypesReader.java:126)
	at org.apache.tika.mime.MimeTypesFactory.create(MimeTypesFactory.java:64)
	at org.apache.tika.mime.MimeTypesFactory.create(MimeTypesFactory.java:93)
	at org.apache.tika.mime.MimeTypesFactory.create(MimeTypesFactory.java:170)
	at org.apache.tika.mime.MimeTypes.getDefaultMimeTypes(MimeTypes.java:577)
	... 6 more
Caused by: org.xml.sax.SAXNotRecognizedException: http://javax.xml.XMLConstants/feature/secure-processing
	at org.apache.xerces.parsers.AbstractSAXParser.setFeature(Unknown Source)
	at org.apache.xerces.jaxp.SAXParserImpl.setFeatures(Unknown Source)
	at org.apache.xerces.jaxp.SAXParserImpl.<init>(Unknown Source)
	at org.apache.xerces.jaxp.SAXParserFactoryImpl.newSAXParserImpl(Unknown Source)
	at org.apache.xerces.jaxp.SAXParserFactoryImpl.setFeature(Unknown Source)
	at org.apache.tika.mime.MimeTypesReader.read(MimeTypesReader.java:119)
	... 10 more

按照错误提示,在FileUtils.java:103即getMimeByTika方法下的Parser parser = new AutoDetectParser();处打断点,在加net.sf.jmimemagi依赖前后对比异常原因,发现了下面一个现象 :

加net.sf.jmimemagi 前,javax.xml.parsers.SAXParserFactory的子类是com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl,该类在jdk自带jar包

rt.jar-->com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl.class下,如图:

而加net.sf.jmimemagi后,javax.xml.parsers.SAXParserFactory的子类变成了

xercesImpl-2.2.4.0.jar>xercom.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl.class,

如图:

该类在setFeature()时抛出了异常。即有两个相同名称的实现类,导致冲突报了异常。故我们将xercesImpl-2.2.4.0.jar排除掉即可,修改后的pom.xml如下所示:

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>

	<groupId>hui</groupId>
	<artifactId>TestWithMaven</artifactId>
	<version>0.0.1-SNAPSHOT</version>
	<packaging>jar</packaging>

	<name>TestWithMaven</name>
	<url>http://maven.apache.org</url>

	<properties>
		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
	</properties>

	<dependencies>
		<dependency>
			<groupId>joda-time</groupId>
			<artifactId>joda-time</artifactId>
			<version>2.9.2</version>
		</dependency>
		<dependency>
			<groupId>org.slf4j</groupId>
			<artifactId>slf4j-log4j12</artifactId>
			<version>1.7.13</version>
		</dependency>
		<dependency>
			<groupId>org.apache.ibatis</groupId>
			<artifactId>ibatis-core</artifactId>
			<version>3.0</version>
		</dependency>
		<dependency>
			<groupId>org.mybatis</groupId>
			<artifactId>mybatis</artifactId>
			<version>3.4.0</version>
		</dependency>
		<dependency>
			<groupId>mysql</groupId>
			<artifactId>mysql-connector-java</artifactId>
			<version>5.1.38</version>
		</dependency>

		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>4.12</version>
		</dependency>
		<dependency>
			<groupId>org.hamcrest</groupId>
			<artifactId>hamcrest-core</artifactId>
			<version>1.3</version>
		</dependency>

		<dependency>
			<groupId>org.springframework</groupId>
			<artifactId>spring-context-support</artifactId>
			<version>4.2.2.RELEASE</version>
		</dependency>
		<dependency>
			<groupId>org.apache.tika</groupId>
			<artifactId>tika-core</artifactId>
			<version>1.13</version>
		</dependency>

		<dependency>
			<groupId>net.sf.jmimemagic</groupId>
			<artifactId>jmimemagic</artifactId>
			<version>0.1.4</version>
			<exclusions>
				<exclusion>
						<groupId>xerces</groupId>
						<artifactId>xercesImpl</artifactId>
				</exclusion>
			</exclusions>
		</dependency>
		<dependency>
			<groupId>xml-apis</groupId>
			<artifactId>xmlParserAPIs</artifactId>
			<version>2.0.2</version>
		</dependency>




	</dependencies>
</project>

至此,再运行,则各方法都不再抛异常。

转载于:https://my.oschina.net/u/2430057/blog/686174

相关文章:

  • (原)Matlab的svmtrain和svmclassify
  • Fastclick 导致click事件触发两次的问题,fastclickclick
  • 用spring-data-redis实现类似twitter的网站(转)
  • [javaSE] GUI(事件监听机制)
  • android WebViewClient和WebChromeClient
  • vim列模式编辑
  • kafka集群发送消息报错
  • apache2 启用php7.0
  • Android 仿网易新闻v3.5:上下滑动的引导页
  • C语言基础
  • angularjs与require的集成摘抄
  • rsync远程数据备份配置之再次总结
  • HP DL580 G7设置IPMI
  • 面试题与答案
  • Gluon公布完整的Java 9 Mobile创新举措
  • 【译】JS基础算法脚本:字符串结尾
  • .pyc 想到的一些问题
  • eclipse(luna)创建web工程
  • EOS是什么
  • express如何解决request entity too large问题
  • gcc介绍及安装
  • Idea+maven+scala构建包并在spark on yarn 运行
  • Java面向对象及其三大特征
  • Python学习之路16-使用API
  • Redis学习笔记 - pipline(流水线、管道)
  • Spring核心 Bean的高级装配
  • 开源地图数据可视化库——mapnik
  • 如何优雅的使用vue+Dcloud(Hbuild)开发混合app
  • 要让cordova项目适配iphoneX + ios11.4,总共要几步?三步
  • 正则表达式小结
  • 说说我为什么看好Spring Cloud Alibaba
  • ### Error querying database. Cause: com.mysql.jdbc.exceptions.jdbc4.CommunicationsException
  • #100天计划# 2013年9月29日
  • #微信小程序:微信小程序常见的配置传旨
  • (1)bark-ml
  • (1)虚拟机的安装与使用,linux系统安装
  • (4)事件处理——(6)给.ready()回调函数传递一个参数(Passing an argument to the .ready() callback)...
  • (Redis使用系列) SpringBoot 中对应2.0.x版本的Redis配置 一
  • (ZT)出版业改革:该死的死,该生的生
  • (第9篇)大数据的的超级应用——数据挖掘-推荐系统
  • (九)c52学习之旅-定时器
  • (蓝桥杯每日一题)love
  • (利用IDEA+Maven)定制属于自己的jar包
  • (详细版)Vary: Scaling up the Vision Vocabulary for Large Vision-Language Models
  • (一)UDP基本编程步骤
  • (轉貼) UML中文FAQ (OO) (UML)
  • *p++,*(p++),*++p,(*p)++区别?
  • .bat批处理出现中文乱码的情况
  • .NET NPOI导出Excel详解
  • .net refrector
  • .NET(C#、VB)APP开发——Smobiler平台控件介绍:Bluetooth组件
  • .NET大文件上传知识整理
  • .net反编译工具
  • @Autowired和@Resource的区别
  • [AIGC] 开源流程引擎哪个好,如何选型?