poi读取word 公式变形怎么办?

使用的是poi 3.12版的。

如下图所示:

优秀到卓越
分享到:
0
package com.uet.common.utils;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.w3c.dom.Document;

public class WordUtils {
	public static String CODING = "GB2312";
	static {
		// 不能漏掉这个,不然jmagick.jar的路径找不到
		String osName = System.getProperty("os.name").toLowerCase();
		if (osName.indexOf("windows") >= 0) {
			CODING = "UTF-8";
		}
	}

	/**
	 * 获得word转为html的内容
	 * 
	 * @param wordFile
	 * @param fileTemp
	 *            eg:/tmp/words/
	 * @throws TransformerException
	 * @throws IOException
	 * @throws ParserConfigurationException
	 */
	public static String convert2Html(File wordFile, final String fileTemp) throws TransformerException, IOException, ParserConfigurationException {
		HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(wordFile)); 
		//wordDocument=new XWPFDocument(new FileInputStream(wordFile));
		WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
		wordToHtmlConverter.setPicturesManager(new PicturesManager() {
			public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {
				String imgagePath = fileTemp + suggestedName.toLowerCase();  
                File file = new File(imgagePath);  
                FileOutputStream fos = null;  
                try {  
                    fos = new FileOutputStream(file);  
                    fos.write(content);  
                    fos.close();  
                } catch (Exception e) {  
                    e.printStackTrace();  
                } 
				return fileTemp + suggestedName.toLowerCase();
			}
		});
		wordToHtmlConverter.processDocument(wordDocument);
		 
		Document htmlDocument = wordToHtmlConverter.getDocument();
		ByteArrayOutputStream out = new ByteArrayOutputStream();
		DOMSource domSource = new DOMSource(htmlDocument);
		StreamResult streamResult = new StreamResult(out);

		TransformerFactory tf = TransformerFactory.newInstance();
		Transformer serializer = tf.newTransformer();
		serializer.setOutputProperty(OutputKeys.ENCODING, CODING);
		serializer.setOutputProperty(OutputKeys.INDENT, "yes");
		serializer.setOutputProperty(OutputKeys.METHOD, "html");
		serializer.transform(domSource, streamResult);
		out.close();
		String content = new String(out.toByteArray());
		content=clean(content);
		return content;
	}
	
	private static String clean(String content){
		content=StringUtils.replace(content, "QUOTE", "");
		content=StringUtils.replace(content, "\\* MERGEFORMAT", "");
		return content;
	}
}
 


这个代码,从网上来,但在poi的源码里有个WordToHtmlConverter的main方法,是一样。

有的时候,图片也无法读取,有时候分数的横线无法显示。

0
共 1 条   当前1/1页

© 2014 究问社区 copyRight 豫ICP备13003319号-1