325N/A/*
325N/A * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
325N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
325N/A *
325N/A * This code is free software; you can redistribute it and/or modify it
325N/A * under the terms of the GNU General Public License version 2 only, as
325N/A * published by the Free Software Foundation. Oracle designates this
325N/A * particular file as subject to the "Classpath" exception as provided
325N/A * by Oracle in the LICENSE file that accompanied this code.
325N/A *
325N/A * This code is distributed in the hope that it will be useful, but WITHOUT
325N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
325N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
325N/A * version 2 for more details (a copy is included in the LICENSE file that
325N/A * accompanied this code).
325N/A *
325N/A * You should have received a copy of the GNU General Public License version
325N/A * 2 along with this work; if not, write to the Free Software Foundation,
325N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
325N/A *
325N/A * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
325N/A * or visit www.oracle.com if you need additional information or have any
325N/A * questions.
325N/A */
325N/A
325N/Apackage com.sun.xml.internal.org.jvnet.mimepull;
325N/A
325N/Aimport java.io.InputStream;
325N/Aimport java.io.IOException;
325N/Aimport java.util.*;
325N/Aimport java.util.logging.Logger;
325N/Aimport java.nio.ByteBuffer;
325N/A
325N/A/**
325N/A * Pull parser for the MIME messages. Applications can use pull API to continue
325N/A * the parsing MIME messages lazily.
325N/A *
325N/A * <pre>
325N/A * for e.g.:
325N/A * <p>
325N/A *
325N/A * MIMEParser parser = ...
325N/A * Iterator<MIMEEvent> it = parser.iterator();
325N/A * while(it.hasNext()) {
325N/A * MIMEEvent event = it.next();
325N/A * ...
325N/A * }
325N/A * </pre>
325N/A *
325N/A * @author Jitendra Kotamraju
325N/A */
325N/Aclass MIMEParser implements Iterable<MIMEEvent> {
325N/A
325N/A private static final Logger LOGGER = Logger.getLogger(MIMEParser.class.getName());
325N/A
325N/A // Actually, the grammar doesn't support whitespace characters
325N/A // after boundary. But the mail implementation checks for it.
325N/A // We will only check for these many whitespace characters after boundary
325N/A private static final int NO_LWSP = 1000;
325N/A private enum STATE {START_MESSAGE, SKIP_PREAMBLE, START_PART, HEADERS, BODY, END_PART, END_MESSAGE}
325N/A private STATE state = STATE.START_MESSAGE;
325N/A
325N/A private final InputStream in;
325N/A private final byte[] bndbytes;
325N/A private final int bl;
325N/A private final MIMEConfig config;
325N/A private final int[] bcs = new int[128]; // BnM algo: Bad Character Shift table
325N/A private final int[] gss; // BnM algo : Good Suffix Shift table
325N/A
325N/A /**
325N/A * Have we parsed the data from our InputStream yet?
325N/A */
325N/A private boolean parsed;
325N/A
325N/A /*
325N/A * Read and process body partsList until we see the
325N/A * terminating boundary line (or EOF).
325N/A */
325N/A private boolean done = false;
325N/A
325N/A private boolean eof;
325N/A private final int capacity;
325N/A private byte[] buf;
325N/A private int len;
325N/A private boolean bol; // beginning of the line
325N/A
325N/A /*
325N/A * Parses the MIME content. At the EOF, it also closes input stream
325N/A */
325N/A MIMEParser(InputStream in, String boundary, MIMEConfig config) {
325N/A this.in = in;
325N/A this.bndbytes = getBytes("--"+boundary);
325N/A bl = bndbytes.length;
325N/A this.config = config;
325N/A gss = new int[bl];
325N/A compileBoundaryPattern();
325N/A
325N/A // \r\n + boundary + "--\r\n" + lots of LWSP
325N/A capacity = config.chunkSize+2+bl+4+NO_LWSP;
325N/A createBuf(capacity);
325N/A }
325N/A
325N/A /**
325N/A * Returns iterator for the parsing events. Use the iterator to advance
325N/A * the parsing.
325N/A *
325N/A * @return iterator for parsing events
325N/A */
325N/A public Iterator<MIMEEvent> iterator() {
325N/A return new MIMEEventIterator();
325N/A }
325N/A
325N/A class MIMEEventIterator implements Iterator<MIMEEvent> {
325N/A
325N/A public boolean hasNext() {
325N/A return !parsed;
325N/A }
325N/A
325N/A public MIMEEvent next() {
325N/A switch(state) {
325N/A case START_MESSAGE :
325N/A LOGGER.finer("MIMEParser state="+STATE.START_MESSAGE);
325N/A state = STATE.SKIP_PREAMBLE;
325N/A return MIMEEvent.START_MESSAGE;
325N/A
325N/A case SKIP_PREAMBLE :
325N/A LOGGER.finer("MIMEParser state="+STATE.SKIP_PREAMBLE);
325N/A skipPreamble();
325N/A // fall through
325N/A case START_PART :
325N/A LOGGER.finer("MIMEParser state="+STATE.START_PART);
325N/A state = STATE.HEADERS;
325N/A return MIMEEvent.START_PART;
325N/A
325N/A case HEADERS :
325N/A LOGGER.finer("MIMEParser state="+STATE.HEADERS);
325N/A InternetHeaders ih = readHeaders();
325N/A state = STATE.BODY;
325N/A bol = true;
325N/A return new MIMEEvent.Headers(ih);
325N/A
325N/A case BODY :
325N/A LOGGER.finer("MIMEParser state="+STATE.BODY);
325N/A ByteBuffer buf = readBody();
325N/A bol = false;
325N/A return new MIMEEvent.Content(buf);
325N/A
325N/A case END_PART :
325N/A LOGGER.finer("MIMEParser state="+STATE.END_PART);
325N/A if (done) {
325N/A state = STATE.END_MESSAGE;
325N/A } else {
325N/A state = STATE.START_PART;
325N/A }
325N/A return MIMEEvent.END_PART;
325N/A
325N/A case END_MESSAGE :
325N/A LOGGER.finer("MIMEParser state="+STATE.END_MESSAGE);
325N/A parsed = true;
325N/A return MIMEEvent.END_MESSAGE;
325N/A
325N/A default :
325N/A throw new MIMEParsingException("Unknown Parser state = "+state);
325N/A }
325N/A }
325N/A
325N/A public void remove() {
325N/A throw new UnsupportedOperationException();
325N/A }
325N/A }
325N/A
325N/A /**
325N/A * Collects the headers for the current part by parsing mesage stream.
325N/A *
325N/A * @return headers for the current part
325N/A */
325N/A private InternetHeaders readHeaders() {
325N/A if (!eof) {
325N/A fillBuf();
325N/A }
325N/A return new InternetHeaders(new LineInputStream());
325N/A }
325N/A
325N/A /**
325N/A * Reads and saves the part of the current attachment part's content.
325N/A * At the end of this method, buf should have the remaining data
325N/A * at index 0.
325N/A *
325N/A * @return a chunk of the part's content
325N/A *
325N/A */
325N/A private ByteBuffer readBody() {
325N/A if (!eof) {
325N/A fillBuf();
325N/A }
325N/A int start = match(buf, 0, len); // matches boundary
325N/A if (start == -1) {
325N/A // No boundary is found
325N/A assert eof || len >= config.chunkSize;
325N/A int chunkSize = eof ? len : config.chunkSize;
325N/A if (eof) {
325N/A // Should we throw exception as there is no closing boundary ? But some impl
325N/A // like SAAJ do not throw excpetion.
325N/A // throw new MIMEParsingException("Reached EOF, but there is no closing MIME boundary.");
325N/A done = true;
325N/A state = STATE.END_PART;
325N/A }
325N/A return adjustBuf(chunkSize, len-chunkSize);
325N/A }
325N/A // Found boundary.
325N/A // Is it at the start of a line ?
325N/A int chunkLen = start;
325N/A if (bol && start == 0) {
325N/A // nothing to do
325N/A } else if (start > 0 && (buf[start-1] == '\n' || buf[start-1] =='\r')) {
325N/A --chunkLen;
325N/A if (buf[start-1] == '\n' && start >1 && buf[start-2] == '\r') {
325N/A --chunkLen;
325N/A }
325N/A } else {
325N/A return adjustBuf(start+1, len-start-1); // boundary is not at beginning of a line
325N/A }
325N/A
325N/A if (start+bl+1 < len && buf[start+bl] == '-' && buf[start+bl+1] == '-') {
325N/A state = STATE.END_PART;
325N/A done = true;
325N/A return adjustBuf(chunkLen, 0);
325N/A }
325N/A
325N/A // Consider all the whitespace in boundary+whitespace+"\r\n"
325N/A int lwsp = 0;
325N/A for(int i=start+bl; i < len && (buf[i] == ' ' || buf[i] == '\t'); i++) {
325N/A ++lwsp;
325N/A }
325N/A
325N/A // Check for \n or \r\n in boundary+whitespace+"\n" or boundary+whitespace+"\r\n"
325N/A if (start+bl+lwsp < len && buf[start+bl+lwsp] == '\n') {
325N/A state = STATE.END_PART;
325N/A return adjustBuf(chunkLen, len-start-bl-lwsp-1);
325N/A } else if (start+bl+lwsp+1 < len && buf[start+bl+lwsp] == '\r' && buf[start+bl+lwsp+1] == '\n') {
325N/A state = STATE.END_PART;
325N/A return adjustBuf(chunkLen, len-start-bl-lwsp-2);
325N/A } else if (start+bl+lwsp+1 < len) {
325N/A return adjustBuf(chunkLen+1, len-chunkLen-1); // boundary string in a part data
325N/A } else if (eof) {
325N/A done = true;
325N/A state = STATE.END_PART;
325N/A return adjustBuf(chunkLen, 0);
325N/A // Should we throw exception as there is no closing boundary ? But some impl
325N/A // like SAAJ do not throw excpetion.
325N/A //throw new MIMEParsingException("Reached EOF, but there is no closing MIME boundary.");
325N/A }
325N/A
325N/A // Some more data needed to determine if it is indeed a proper boundary
325N/A return adjustBuf(chunkLen, len-chunkLen);
325N/A }
325N/A
325N/A /**
325N/A * Returns a chunk from the original buffer. A new buffer is
325N/A * created with the remaining bytes.
325N/A *
325N/A * @param chunkSize create a chunk with these many bytes
325N/A * @param remaining bytes from the end of the buffer that need to be copied to
325N/A * the beginning of the new buffer
325N/A * @return chunk
325N/A */
325N/A private ByteBuffer adjustBuf(int chunkSize, int remaining) {
325N/A assert buf != null;
325N/A assert chunkSize >= 0;
325N/A assert remaining >= 0;
325N/A
325N/A byte[] temp = buf;
325N/A // create a new buf and adjust it without this chunk
325N/A createBuf(remaining);
325N/A System.arraycopy(temp, len-remaining, buf, 0, remaining);
325N/A len = remaining;
325N/A
325N/A return ByteBuffer.wrap(temp, 0, chunkSize);
325N/A }
325N/A
325N/A private void createBuf(int min) {
325N/A buf = new byte[min < capacity ? capacity : min];
325N/A }
325N/A
325N/A /**
325N/A * Skips the preamble to find the first attachment part
325N/A */
325N/A private void skipPreamble() {
325N/A
325N/A while(true) {
325N/A if (!eof) {
325N/A fillBuf();
325N/A }
325N/A int start = match(buf, 0, len); // matches boundary
325N/A if (start == -1) {
325N/A // No boundary is found
325N/A if (eof) {
325N/A throw new MIMEParsingException("Missing start boundary");
325N/A } else {
325N/A adjustBuf(len-bl+1, bl-1);
325N/A continue;
325N/A }
325N/A }
325N/A
325N/A if (start > config.chunkSize) {
325N/A adjustBuf(start, len-start);
325N/A continue;
325N/A }
325N/A // Consider all the whitespace boundary+whitespace+"\r\n"
325N/A int lwsp = 0;
325N/A for(int i=start+bl; i < len && (buf[i] == ' ' || buf[i] == '\t'); i++) {
325N/A ++lwsp;
325N/A }
325N/A // Check for \n or \r\n
325N/A if (start+bl+lwsp < len && (buf[start+bl+lwsp] == '\n' || buf[start+bl+lwsp] == '\r') ) {
325N/A if (buf[start+bl+lwsp] == '\n') {
325N/A adjustBuf(start+bl+lwsp+1, len-start-bl-lwsp-1);
325N/A break;
325N/A } else if (start+bl+lwsp+1 < len && buf[start+bl+lwsp+1] == '\n') {
325N/A adjustBuf(start+bl+lwsp+2, len-start-bl-lwsp-2);
325N/A break;
325N/A }
325N/A }
325N/A adjustBuf(start+1, len-start-1);
325N/A }
325N/A LOGGER.fine("Skipped the preamble. buffer len="+len);
325N/A }
325N/A
325N/A private static byte[] getBytes(String s) {
325N/A char [] chars= s.toCharArray();
325N/A int size = chars.length;
325N/A byte[] bytes = new byte[size];
325N/A
325N/A for (int i = 0; i < size;)
325N/A bytes[i] = (byte) chars[i++];
325N/A return bytes;
325N/A }
325N/A
325N/A /**
325N/A * Boyer-Moore search method. Copied from java.util.regex.Pattern.java
325N/A *
325N/A * Pre calculates arrays needed to generate the bad character
325N/A * shift and the good suffix shift. Only the last seven bits
325N/A * are used to see if chars match; This keeps the tables small
325N/A * and covers the heavily used ASCII range, but occasionally
325N/A * results in an aliased match for the bad character shift.
325N/A */
325N/A private void compileBoundaryPattern() {
325N/A int i, j;
325N/A
325N/A // Precalculate part of the bad character shift
325N/A // It is a table for where in the pattern each
325N/A // lower 7-bit value occurs
325N/A for (i = 0; i < bndbytes.length; i++) {
325N/A bcs[bndbytes[i]&0x7F] = i + 1;
325N/A }
325N/A
325N/A // Precalculate the good suffix shift
325N/A // i is the shift amount being considered
325N/ANEXT: for (i = bndbytes.length; i > 0; i--) {
325N/A // j is the beginning index of suffix being considered
325N/A for (j = bndbytes.length - 1; j >= i; j--) {
325N/A // Testing for good suffix
325N/A if (bndbytes[j] == bndbytes[j-i]) {
325N/A // src[j..len] is a good suffix
325N/A gss[j-1] = i;
325N/A } else {
325N/A // No match. The array has already been
325N/A // filled up with correct values before.
325N/A continue NEXT;
325N/A }
325N/A }
325N/A // This fills up the remaining of optoSft
325N/A // any suffix can not have larger shift amount
325N/A // then its sub-suffix. Why???
325N/A while (j > 0) {
325N/A gss[--j] = i;
325N/A }
325N/A }
325N/A // Set the guard value because of unicode compression
325N/A gss[bndbytes.length -1] = 1;
325N/A }
325N/A
325N/A /**
325N/A * Finds the boundary in the given buffer using Boyer-Moore algo.
325N/A * Copied from java.util.regex.Pattern.java
325N/A *
325N/A * @param mybuf boundary to be searched in this mybuf
325N/A * @param off start index in mybuf
325N/A * @param len number of bytes in mybuf
325N/A *
325N/A * @return -1 if there is no match or index where the match starts
325N/A */
325N/A private int match(byte[] mybuf, int off, int len) {
325N/A int last = len - bndbytes.length;
325N/A
325N/A // Loop over all possible match positions in text
325N/ANEXT: while (off <= last) {
325N/A // Loop over pattern from right to left
325N/A for (int j = bndbytes.length - 1; j >= 0; j--) {
325N/A byte ch = mybuf[off+j];
325N/A if (ch != bndbytes[j]) {
325N/A // Shift search to the right by the maximum of the
325N/A // bad character shift and the good suffix shift
325N/A off += Math.max(j + 1 - bcs[ch&0x7F], gss[j]);
325N/A continue NEXT;
325N/A }
325N/A }
325N/A // Entire pattern matched starting at off
325N/A return off;
325N/A }
325N/A return -1;
325N/A }
325N/A
325N/A /**
325N/A * Fills the remaining buf to the full capacity
325N/A */
325N/A private void fillBuf() {
325N/A LOGGER.finer("Before fillBuf() buffer len="+len);
325N/A assert !eof;
325N/A while(len < buf.length) {
325N/A int read;
325N/A try {
325N/A read = in.read(buf, len, buf.length-len);
325N/A } catch(IOException ioe) {
325N/A throw new MIMEParsingException(ioe);
325N/A }
325N/A if (read == -1) {
325N/A eof = true;
325N/A try {
325N/A LOGGER.fine("Closing the input stream.");
325N/A in.close();
325N/A } catch(IOException ioe) {
325N/A throw new MIMEParsingException(ioe);
325N/A }
325N/A break;
325N/A } else {
325N/A len += read;
325N/A }
325N/A }
325N/A LOGGER.finer("After fillBuf() buffer len="+len);
325N/A }
325N/A
325N/A private void doubleBuf() {
325N/A byte[] temp = new byte[2*len];
325N/A System.arraycopy(buf, 0, temp, 0, len);
325N/A buf = temp;
325N/A if (!eof) {
325N/A fillBuf();
325N/A }
325N/A }
325N/A
325N/A class LineInputStream {
325N/A private int offset;
325N/A
325N/A /*
325N/A * Read a line containing only ASCII characters from the input
325N/A * stream. A line is terminated by a CR or NL or CR-NL sequence.
325N/A * A common error is a CR-CR-NL sequence, which will also terminate
325N/A * a line.
325N/A * The line terminator is not returned as part of the returned
325N/A * String. Returns null if no data is available. <p>
325N/A *
325N/A * This class is similar to the deprecated
325N/A * <code>DataInputStream.readLine()</code>
325N/A */
325N/A public String readLine() throws IOException {
325N/A
325N/A int hdrLen = 0;
325N/A int lwsp = 0;
325N/A while(offset+hdrLen < len) {
325N/A if (buf[offset+hdrLen] == '\n') {
325N/A lwsp = 1;
325N/A break;
325N/A }
325N/A if (offset+hdrLen+1 == len) {
325N/A doubleBuf();
325N/A }
325N/A if (offset+hdrLen+1 >= len) { // No more data in the stream
325N/A assert eof;
325N/A return null;
325N/A }
325N/A if (buf[offset+hdrLen] == '\r' && buf[offset+hdrLen+1] == '\n') {
325N/A lwsp = 2;
325N/A break;
325N/A }
325N/A ++hdrLen;
325N/A }
325N/A if (hdrLen == 0) {
325N/A adjustBuf(offset+lwsp, len-offset-lwsp);
325N/A return null;
325N/A }
325N/A
325N/A String hdr = new String(buf, offset, hdrLen);
325N/A offset += hdrLen+lwsp;
325N/A return hdr;
325N/A }
325N/A
325N/A }
325N/A
325N/A}