博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
用Java将Excel的xls和xlsx文件转换成csv文件的方法
阅读量:6163 次
发布时间:2019-06-21

本文共 24137 字,大约阅读时间需要 80 分钟。

hot3.png

package com.xueyi.core.util;import java.io.IOException;import java.io.InputStream;import java.io.PrintStream;import javax.xml.parsers.ParserConfigurationException;import javax.xml.parsers.SAXParser;import javax.xml.parsers.SAXParserFactory;import org.apache.poi.openxml4j.exceptions.OpenXML4JException;import org.apache.poi.openxml4j.opc.OPCPackage;import org.apache.poi.openxml4j.opc.PackageAccess;import org.apache.poi.ss.usermodel.BuiltinFormats;import org.apache.poi.ss.usermodel.DataFormatter;import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;import org.apache.poi.xssf.eventusermodel.XSSFReader;import org.apache.poi.xssf.model.StylesTable;import org.apache.poi.xssf.usermodel.XSSFCellStyle;//import org.apache.poi.xssf.usermodel.XSSFRichTextString;import org.apache.poi.xssf.usermodel.XSSFRichTextString;import org.xml.sax.Attributes;import org.xml.sax.ContentHandler;import org.xml.sax.InputSource;import org.xml.sax.SAXException;import org.xml.sax.XMLReader;import org.xml.sax.helpers.DefaultHandler;public class XLSXTOCSV {    /**     * The type of the data value is indicated by an attribute on the cell. The     * value is usually in a "v" element within the cell.     */    enum xssfDataType {        BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER,    }    /**     * Derived from http://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api     * 

     * Also see Standard ECMA-376, 1st edition, part 4, pages 1928ff, at     * http://www.ecma-international.org/publications/standards/Ecma-376.htm     * 

     * A web-friendly version is http://openiso.org/Ecma/376/Part4     */    class MyXSSFSheetHandler extends DefaultHandler {        /**         * Table with styles         */        private StylesTable stylesTable;        /**         * Table with unique strings         */        private ReadOnlySharedStringsTable sharedStringsTable;        /**         * Destination for data         */        private final PrintStream output;        /**         * Number of columns to read starting with leftmost         */        private final int minColumnCount;        // Set when V start element is seen        private boolean vIsOpen;        // Set when cell start element is seen;        // used when cell close element is seen.        private xssfDataType nextDataType;        // Used to format numeric cell values.        private short formatIndex;        private String formatString;        private final DataFormatter formatter;        private int thisColumn = -1;        // The last column printed to the output stream        private int lastColumnNumber = -1;        // Gathers characters as they are seen.        private StringBuffer value;        /**         * Accepts objects needed while parsing.         *          * @param styles         *            Table of styles         * @param strings         *            Table of shared strings         * @param cols         *            Minimum number of columns to show         * @param target         *            Sink for output         */        public MyXSSFSheetHandler(StylesTable styles,                ReadOnlySharedStringsTable strings, int cols, PrintStream target) {            this.stylesTable = styles;            this.sharedStringsTable = strings;            this.minColumnCount = cols;            this.output = target;            this.value = new StringBuffer();            this.nextDataType = xssfDataType.NUMBER;            this.formatter = new DataFormatter();        }        /*         * (non-Javadoc)         *          * @see         * org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String,         * java.lang.String, java.lang.String, org.xml.sax.Attributes)         */        public void startElement(String uri, String localName, String name,                Attributes attributes) throws SAXException {            if ("inlineStr".equals(name) || "v".equals(name)) {                vIsOpen = true;                // Clear contents cache                value.setLength(0);            }            // c => cell            else if ("c".equals(name)) {                // Get the cell reference                String r = attributes.getValue("r");                int firstDigit = -1;                for (int c = 0; c < r.length(); ++c) {                    if (Character.isDigit(r.charAt(c))) {                        firstDigit = c;                        break;                    }                }                thisColumn = nameToColumn(r.substring(0, firstDigit));                // Set up defaults.                this.nextDataType = xssfDataType.NUMBER;                this.formatIndex = -1;                this.formatString = null;                String cellType = attributes.getValue("t");                String cellStyleStr = attributes.getValue("s");                if ("b".equals(cellType))                    nextDataType = xssfDataType.BOOL;                else if ("e".equals(cellType))                    nextDataType = xssfDataType.ERROR;                else if ("inlineStr".equals(cellType))                    nextDataType = xssfDataType.INLINESTR;                else if ("s".equals(cellType))                    nextDataType = xssfDataType.SSTINDEX;                else if ("str".equals(cellType))                    nextDataType = xssfDataType.FORMULA;                else if (cellStyleStr != null) {                    // It's a number, but almost certainly one                    // with a special style or format                    int styleIndex = Integer.parseInt(cellStyleStr);                    XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);                    this.formatIndex = style.getDataFormat();                    this.formatString = style.getDataFormatString();                    if (this.formatString == null)                        this.formatString = BuiltinFormats                                .getBuiltinFormat(this.formatIndex);                }            }        }        /*         * (non-Javadoc)         *          * @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String,         * java.lang.String, java.lang.String)         */        public void endElement(String uri, String localName, String name)                throws SAXException {            String thisStr = null;            // v => contents of a cell            if ("v".equals(name)) {                // Process the value contents as required.                // Do now, as characters() may be called more than once                switch (nextDataType) {                case BOOL:                    char first = value.charAt(0);                    thisStr = first == '0' ? "FALSE" : "TRUE";                    break;                case ERROR:                    thisStr = "\"ERROR:" + value.toString() + '"';                    break;                case FORMULA:                    // A formula could result in a string value,                    // so always add double-quote characters.                    thisStr = '"' + value.toString() + '"';                    break;                case INLINESTR:                    // TODO: have seen an example of this, so it's untested.                    XSSFRichTextString rtsi = new XSSFRichTextString(value                            .toString());                    thisStr = '"' + rtsi.toString() + '"';                    break;                case SSTINDEX:                    String sstIndex = value.toString();                    try {                        int idx = Integer.parseInt(sstIndex);                        XSSFRichTextString rtss = new XSSFRichTextString(                                sharedStringsTable.getEntryAt(idx));                        thisStr = '"' + rtss.toString() + '"';                    } catch (NumberFormatException ex) {                        output.println("Failed to parse SST index '" + sstIndex                                + "': " + ex.toString());                    }                    break;                case NUMBER:                    String n = value.toString();                    if (this.formatString != null)                        thisStr = formatter.formatRawCellContents(Double                                .parseDouble(n), this.formatIndex,                                this.formatString);                    else                        thisStr = n;                    break;                default:                    thisStr = "(TODO: Unexpected type: " + nextDataType + ")";                    break;                }                // Output after we've seen the string contents                // Emit commas for any fields that were missing on this row                if (lastColumnNumber == -1) {                    lastColumnNumber = 0;                }                for (int i = lastColumnNumber; i < thisColumn; ++i)                    output.print(',');                // Might be the empty string.                output.print(thisStr);                // Update column                if (thisColumn > -1)                    lastColumnNumber = thisColumn;            } else if ("row".equals(name)) {                // Print out any missing commas if needed                if (minColumns > 0) {                    // Columns are 0 based                    if (lastColumnNumber == -1) {                        lastColumnNumber = 0;                    }                    for (int i = lastColumnNumber; i < (this.minColumnCount); i++) {                        output.print(',');                    }                }                // We're onto a new row                output.println();                lastColumnNumber = -1;            }        }        /**         * Captures characters only if a suitable element is open. Originally         * was just "v"; extended for inlineStr also.         */        public void characters(char[] ch, int start, int length)                throws SAXException {            if (vIsOpen)                value.append(ch, start, length);        }        /**         * Converts an Excel column name like "C" to a zero-based index.         *          * @param name         * @return Index corresponding to the specified name         */        private int nameToColumn(String name) {            int column = -1;            for (int i = 0; i < name.length(); ++i) {                int c = name.charAt(i);                column = (column + 1) * 26 + c - 'A';            }            return column;        }    }    // /    private OPCPackage xlsxPackage;    private int minColumns;    private PrintStream output;    private final String OUTPUT_CHARSET = "GBK";     /**     * Creates a new XLSX -> CSV converter     *      * @param pkg     *            The XLSX package to process     * @param output     *            The PrintStream to output the CSV to     * @param minColumns     *            The minimum number of columns to output, or -1 for no minimum     */    public XLSXTOCSV(OPCPackage pkg, PrintStream output, int minColumns) {        this.xlsxPackage = pkg;        this.output = output;        this.minColumns = minColumns;    }        //TODO catch exceptions    public XLSXTOCSV(String inputFilePath, String outputFilePath) throws Exception {        xlsxPackage = OPCPackage.open(inputFilePath, PackageAccess.READ);        output = new PrintStream(outputFilePath, OUTPUT_CHARSET);        minColumns = -1;    }    /**     * Parses and shows the content of one sheet using the specified styles and     * shared-strings tables.     *      * @param styles     * @param strings     * @param sheetInputStream     */    public void processSheet(StylesTable styles,            ReadOnlySharedStringsTable strings, InputStream sheetInputStream)            throws IOException, ParserConfigurationException, SAXException {        InputSource sheetSource = new InputSource(sheetInputStream);        SAXParserFactory saxFactory = SAXParserFactory.newInstance();        SAXParser saxParser = saxFactory.newSAXParser();        XMLReader sheetParser = saxParser.getXMLReader();        ContentHandler handler = new MyXSSFSheetHandler(styles, strings,                this.minColumns, this.output);        sheetParser.setContentHandler(handler);        sheetParser.parse(sheetSource);    }    /**     * Initiates the processing of the XLS workbook file to CSV.     *      * @throws IOException     * @throws OpenXML4JException     * @throws ParserConfigurationException     * @throws SAXException     */    public void process() throws IOException, OpenXML4JException,            ParserConfigurationException, SAXException {        ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(                this.xlsxPackage);        XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);        StylesTable styles = xssfReader.getStylesTable();        XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader                .getSheetsData();        int index = 0;        while (iter.hasNext()) {            InputStream stream = iter.next();            String sheetName = iter.getSheetName();            this.output.println();            this.output.println(sheetName + " [index=" + index + "]:");            processSheet(styles, strings, stream);            stream.close();            ++index;        }    }    public static void main(String[] args) throws Exception {        XLSXTOCSV xlsx2csv = new XLSXTOCSV("E:\\V5User\\Desktop\\test.xlsx","E:\\V5User\\Desktop\\out.csv");        xlsx2csv.process();    } }

上面的是xlsx转化为csv,下面是xls转化为csv

package com.xueyi.core.util;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.IOException;import java.io.PrintStream;import java.util.ArrayList;import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;import org.apache.poi.hssf.eventusermodel.HSSFListener;import org.apache.poi.hssf.eventusermodel.HSSFRequest;import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;import org.apache.poi.hssf.model.HSSFFormulaParser;import org.apache.poi.hssf.record.BOFRecord;import org.apache.poi.hssf.record.BlankRecord;import org.apache.poi.hssf.record.BoolErrRecord;import org.apache.poi.hssf.record.BoundSheetRecord;import org.apache.poi.hssf.record.FormulaRecord;import org.apache.poi.hssf.record.LabelRecord;import org.apache.poi.hssf.record.LabelSSTRecord;import org.apache.poi.hssf.record.NoteRecord;import org.apache.poi.hssf.record.NumberRecord;import org.apache.poi.hssf.record.RKRecord;import org.apache.poi.hssf.record.Record;import org.apache.poi.hssf.record.SSTRecord;import org.apache.poi.hssf.record.StringRecord;import org.apache.poi.hssf.usermodel.HSSFWorkbook;import org.apache.poi.poifs.filesystem.POIFSFileSystem;/** * A XLS -> CSV processor, that uses the MissingRecordAware EventModel code to * ensure it outputs all columns and rows. *  * @author Nick Burch  */public class XLSTOCSV implements HSSFListener {    private int minColumns;    private POIFSFileSystem fs;    private PrintStream output;    private int lastRowNumber;    private int lastColumnNumber;    /** Should we output the formula, or the value it has? */    private boolean outputFormulaValues = true;    /** For parsing Formulas */    private SheetRecordCollectingListener workbookBuildingListener;    private HSSFWorkbook stubWorkbook;    // Records we pick up as we process    private SSTRecord sstRecord;    private FormatTrackingHSSFListener formatListener;    /** So we known which sheet we're on */    private int sheetIndex = -1;    private BoundSheetRecord[] orderedBSRs;    private ArrayList boundSheetRecords = new ArrayList();    // For handling formulas with string results    private int nextRow;    private int nextColumn;    private boolean outputNextStringRecord;        private final String OUTPUT_CHARSET = "GBK";    /**     * Creates a new XLS -> CSV converter     *      * @param fs     *            The POIFSFileSystem to process     * @param output     *            The PrintStream to output the CSV to     * @param minColumns     *            The minimum number of columns to output, or -1 for no minimum     */    public XLSTOCSV(POIFSFileSystem fs, PrintStream output, int minColumns) {        this.fs = fs;        this.output = output;        this.minColumns = minColumns;    }        public XLSTOCSV(String inputFilePath, String outputFilePath) throws Exception {        fs = new POIFSFileSystem(new FileInputStream(inputFilePath));        output = new PrintStream(outputFilePath, OUTPUT_CHARSET);        minColumns = -1;    }    /**     * Creates a new XLS -> CSV converter     *      * @param filename     *            The file to process     * @param minColumns     *            The minimum number of columns to output, or -1 for no minimum     * @throws IOException     * @throws FileNotFoundException     */    public XLSTOCSV(String filename, int minColumns) throws IOException,            FileNotFoundException {        this(new POIFSFileSystem(new FileInputStream(filename)), System.out,                minColumns);    }    /**     * Initiates the processing of the XLS file to CSV     */    public void process() throws IOException {        MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(                this);        formatListener = new FormatTrackingHSSFListener(listener);        HSSFEventFactory factory = new HSSFEventFactory();        HSSFRequest request = new HSSFRequest();        if (outputFormulaValues) {            request.addListenerForAllRecords(formatListener);        } else {            workbookBuildingListener = new SheetRecordCollectingListener(                    formatListener);            request.addListenerForAllRecords(workbookBuildingListener);        }        factory.processWorkbookEvents(request, fs);    }    /**     * Main HSSFListener method, processes events, and outputs the CSV as the     * file is processed.     */    public void processRecord(Record record) {        int thisRow = -1;        int thisColumn = -1;        String thisStr = null;        switch (record.getSid()) {        case BoundSheetRecord.sid:            boundSheetRecords.add(record);            break;        case BOFRecord.sid:            BOFRecord br = (BOFRecord) record;            if (br.getType() == BOFRecord.TYPE_WORKSHEET) {                // Create sub workbook if required                if (workbookBuildingListener != null && stubWorkbook == null) {                    stubWorkbook = workbookBuildingListener                            .getStubHSSFWorkbook();                }                // Output the worksheet name                // Works by ordering the BSRs by the location of                // their BOFRecords, and then knowing that we                // process BOFRecords in byte offset order                sheetIndex++;                if (orderedBSRs == null) {                    orderedBSRs = BoundSheetRecord                            .orderByBofPosition(boundSheetRecords);                }                output.println();                output.println(orderedBSRs[sheetIndex].getSheetname() + " ["                        + (sheetIndex + 1) + "]:");            }            break;        case SSTRecord.sid:            sstRecord = (SSTRecord) record;            break;        case BlankRecord.sid:            BlankRecord brec = (BlankRecord) record;            thisRow = brec.getRow();            thisColumn = brec.getColumn();            thisStr = "";            break;        case BoolErrRecord.sid:            BoolErrRecord berec = (BoolErrRecord) record;            thisRow = berec.getRow();            thisColumn = berec.getColumn();            thisStr = "";            break;        case FormulaRecord.sid:            FormulaRecord frec = (FormulaRecord) record;            thisRow = frec.getRow();            thisColumn = frec.getColumn();            if (outputFormulaValues) {                if (Double.isNaN(frec.getValue())) {                    // Formula result is a string                    // This is stored in the next record                    outputNextStringRecord = true;                    nextRow = frec.getRow();                    nextColumn = frec.getColumn();                } else {                    thisStr = formatListener.formatNumberDateCell(frec);                }            } else {                thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook,                        frec.getParsedExpression()) + '"';            }            break;        case StringRecord.sid:            if (outputNextStringRecord) {                // String for formula                StringRecord srec = (StringRecord) record;                thisStr = srec.getString();                thisRow = nextRow;                thisColumn = nextColumn;                outputNextStringRecord = false;            }            break;        case LabelRecord.sid:            LabelRecord lrec = (LabelRecord) record;            thisRow = lrec.getRow();            thisColumn = lrec.getColumn();            thisStr = '"' + lrec.getValue() + '"';            break;        case LabelSSTRecord.sid:            LabelSSTRecord lsrec = (LabelSSTRecord) record;            thisRow = lsrec.getRow();            thisColumn = lsrec.getColumn();            if (sstRecord == null) {                thisStr = '"' + "(No SST Record, can't identify string)" + '"';            } else {                thisStr = '"' + sstRecord.getString(lsrec.getSSTIndex())                        .toString() + '"';            }            break;        case NoteRecord.sid:            NoteRecord nrec = (NoteRecord) record;            thisRow = nrec.getRow();            thisColumn = nrec.getColumn();            // TODO: Find object to match nrec.getShapeId()            thisStr = '"' + "(TODO)" + '"';            break;        case NumberRecord.sid:            NumberRecord numrec = (NumberRecord) record;            thisRow = numrec.getRow();            thisColumn = numrec.getColumn();            // Format            thisStr = formatListener.formatNumberDateCell(numrec);            break;        case RKRecord.sid:            RKRecord rkrec = (RKRecord) record;            thisRow = rkrec.getRow();            thisColumn = rkrec.getColumn();            thisStr = '"' + "(TODO)" + '"';            break;        default:         break;     }     // Handle new row     if (thisRow != -1 && thisRow != lastRowNumber) {         lastColumnNumber = -1;     }     // Handle missing column     if (record instanceof MissingCellDummyRecord) {         MissingCellDummyRecord mc = (MissingCellDummyRecord) record;         thisRow = mc.getRow();         thisColumn = mc.getColumn();         thisStr = "";     }     // If we got something to print out, do so     if (thisStr != null) {         if (thisColumn > 0) {             output.print(',');         }         output.print(thisStr);     }        // Update column and row count        if (thisRow > -1)            lastRowNumber = thisRow;        if (thisColumn > -1)            lastColumnNumber = thisColumn;        // Handle end of row        if (record instanceof LastCellOfRowDummyRecord) {            // Print out any missing commas if needed            if (minColumns > 0) {                // Columns are 0 based                if (lastColumnNumber == -1) {                    lastColumnNumber = 0;                }                for (int i = lastColumnNumber; i < (minColumns); i++) {                    output.print(',');                }            }            // We're onto a new row            lastColumnNumber = -1;            // End the row            output.println();        }    }    public static void main(String[] args) throws Exception {        XLSTOCSV xls2csv = new XLSTOCSV("E:\\Desktop\\资讯管理-添加资讯.xls","E:\\Desktop\\out1.csv");        xls2csv.process();    }}

上面转载自 

但是里面的代码粘贴麻烦,就给整理保留以下,main测试在下面,更改路径可以直接使用

转载于:https://my.oschina.net/u/2297250/blog/361017

你可能感兴趣的文章
showdialog弹出窗口刷新问题
查看>>
java
查看>>
Vue.js连接后台数据jsp页面  ̄▽ ̄
查看>>
关于程序的单元测试
查看>>
mysql内存优化
查看>>
都市求生日记第一篇
查看>>
Java集合---HashMap源码剖析
查看>>
SQL优化技巧
查看>>
thead 固定,tbody 超出滚动(附带改变滚动条样式)
查看>>
Dijkstra算法
查看>>
css 动画 和 响应式布局和兼容性
查看>>
csrf 跨站请求伪造相关以及django的中间件
查看>>
MySQL数据类型--与MySQL零距离接触2-11MySQL自动编号
查看>>
生日小助手源码运行的步骤
查看>>
Configuration python CGI in XAMPP in win-7
查看>>
bzoj 5006(洛谷 4547) [THUWC2017]Bipartite 随机二分图——期望DP
查看>>
CF 888E Maximum Subsequence——折半搜索
查看>>
欧几里德算法(辗转相除法)
查看>>
面试题1-----SVM和LR的异同
查看>>
MFC控件的SubclassDlgItem
查看>>