0N/A/*
2362N/A * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
0N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
0N/A *
0N/A * This code is free software; you can redistribute it and/or modify it
0N/A * under the terms of the GNU General Public License version 2 only, as
2362N/A * published by the Free Software Foundation. Oracle designates this
0N/A * particular file as subject to the "Classpath" exception as provided
2362N/A * by Oracle in the LICENSE file that accompanied this code.
0N/A *
0N/A * This code is distributed in the hope that it will be useful, but WITHOUT
0N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
0N/A * version 2 for more details (a copy is included in the LICENSE file that
0N/A * accompanied this code).
0N/A *
0N/A * You should have received a copy of the GNU General Public License version
0N/A * 2 along with this work; if not, write to the Free Software Foundation,
0N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0N/A *
2362N/A * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
2362N/A * or visit www.oracle.com if you need additional information or have any
2362N/A * questions.
0N/A */
0N/A
0N/Apackage javax.swing.text.html.parser;
0N/A
0N/Aimport javax.swing.text.SimpleAttributeSet;
0N/Aimport javax.swing.text.html.HTMLEditorKit;
0N/Aimport javax.swing.text.html.HTML;
0N/Aimport javax.swing.text.ChangedCharSetException;
0N/A
0N/Aimport java.util.*;
0N/Aimport java.io.*;
0N/Aimport java.net.*;
0N/A
0N/A/**
0N/A * A Parser for HTML Documents (actually, you can specify a DTD, but
0N/A * you should really only use this class with the html dtd in swing).
0N/A * Reads an InputStream of HTML and
0N/A * invokes the appropriate methods in the ParserCallback class. This
0N/A * is the default parser used by HTMLEditorKit to parse HTML url's.
0N/A * <p>This will message the callback for all valid tags, as well as
0N/A * tags that are implied but not explicitly specified. For example, the
0N/A * html string (&lt;p&gt;blah) only has a p tag defined. The callback
0N/A * will see the following methods:
0N/A * <ol><li><i>handleStartTag(html, ...)</i></li>
0N/A * <li><i>handleStartTag(head, ...)</i></li>
0N/A * <li><i>handleEndTag(head)</i></li>
0N/A * <li><i>handleStartTag(body, ...)</i></li>
0N/A * <li>handleStartTag(p, ...)</i></li>
0N/A * <li>handleText(...)</li>
0N/A * <li><i>handleEndTag(p)</i></li>
0N/A * <li><i>handleEndTag(body)</i></li>
0N/A * <li><i>handleEndTag(html)</i></li>
0N/A * </ol>
0N/A * The items in <i>italic</i> are implied, that is, although they were not
0N/A * explicitly specified, to be correct html they should have been present
0N/A * (head isn't necessary, but it is still generated). For tags that
0N/A * are implied, the AttributeSet argument will have a value of
0N/A * <code>Boolean.TRUE</code> for the key
0N/A * <code>HTMLEditorKit.ParserCallback.IMPLIED</code>.
0N/A * <p>HTML.Attributes defines a type safe enumeration of html attributes.
0N/A * If an attribute key of a tag is defined in HTML.Attribute, the
0N/A * HTML.Attribute will be used as the key, otherwise a String will be used.
0N/A * For example &lt;p foo=bar class=neat&gt; has two attributes. foo is
0N/A * not defined in HTML.Attribute, where as class is, therefore the
0N/A * AttributeSet will have two values in it, HTML.Attribute.CLASS with
0N/A * a String value of 'neat' and the String key 'foo' with a String value of
0N/A * 'bar'.
0N/A * <p>The position argument will indicate the start of the tag, comment
0N/A * or text. Similiar to arrays, the first character in the stream has a
0N/A * position of 0. For tags that are
0N/A * implied the position will indicate
0N/A * the location of the next encountered tag. In the first example,
0N/A * the implied start body and html tags will have the same position as the
0N/A * p tag, and the implied end p, html and body tags will all have the same
0N/A * position.
0N/A * <p>As html skips whitespace the position for text will be the position
0N/A * of the first valid character, eg in the string '\n\n\nblah'
0N/A * the text 'blah' will have a position of 3, the newlines are skipped.
0N/A * <p>
0N/A * For attributes that do not have a value, eg in the html
0N/A * string <code>&lt;foo blah&gt;</code> the attribute <code>blah</code>
0N/A * does not have a value, there are two possible values that will be
0N/A * placed in the AttributeSet's value:
0N/A * <ul>
0N/A * <li>If the DTD does not contain an definition for the element, or the
0N/A * definition does not have an explicit value then the value in the
0N/A * AttributeSet will be <code>HTML.NULL_ATTRIBUTE_VALUE</code>.
0N/A * <li>If the DTD contains an explicit value, as in:
0N/A * <code>&lt;!ATTLIST OPTION selected (selected) #IMPLIED&gt;</code>
0N/A * this value from the dtd (in this case selected) will be used.
0N/A * </ul>
0N/A * <p>
0N/A * Once the stream has been parsed, the callback is notified of the most
0N/A * likely end of line string. The end of line string will be one of
0N/A * \n, \r or \r\n, which ever is encountered the most in parsing the
0N/A * stream.
0N/A *
0N/A * @author Sunita Mani
0N/A */
0N/Apublic class DocumentParser extends javax.swing.text.html.parser.Parser {
0N/A
0N/A private int inbody;
0N/A private int intitle;
0N/A private int inhead;
0N/A private int instyle;
0N/A private int inscript;
0N/A private boolean seentitle;
0N/A private HTMLEditorKit.ParserCallback callback = null;
0N/A private boolean ignoreCharSet = false;
0N/A private static final boolean debugFlag = false;
0N/A
0N/A public DocumentParser(DTD dtd) {
0N/A super(dtd);
0N/A }
0N/A
0N/A public void parse(Reader in, HTMLEditorKit.ParserCallback callback, boolean ignoreCharSet) throws IOException {
0N/A this.ignoreCharSet = ignoreCharSet;
0N/A this.callback = callback;
0N/A parse(in);
0N/A // end of line
0N/A callback.handleEndOfLineString(getEndOfLineString());
0N/A }
0N/A
0N/A /**
0N/A * Handle Start Tag.
0N/A */
0N/A protected void handleStartTag(TagElement tag) {
0N/A
0N/A Element elem = tag.getElement();
0N/A if (elem == dtd.body) {
0N/A inbody++;
0N/A } else if (elem == dtd.html) {
0N/A } else if (elem == dtd.head) {
0N/A inhead++;
0N/A } else if (elem == dtd.title) {
0N/A intitle++;
0N/A } else if (elem == dtd.style) {
0N/A instyle++;
0N/A } else if (elem == dtd.script) {
0N/A inscript++;
0N/A }
0N/A if (debugFlag) {
0N/A if (tag.fictional()) {
0N/A debug("Start Tag: " + tag.getHTMLTag() + " pos: " + getCurrentPos());
0N/A } else {
0N/A debug("Start Tag: " + tag.getHTMLTag() + " attributes: " +
0N/A getAttributes() + " pos: " + getCurrentPos());
0N/A }
0N/A }
0N/A if (tag.fictional()) {
0N/A SimpleAttributeSet attrs = new SimpleAttributeSet();
0N/A attrs.addAttribute(HTMLEditorKit.ParserCallback.IMPLIED,
0N/A Boolean.TRUE);
0N/A callback.handleStartTag(tag.getHTMLTag(), attrs,
0N/A getBlockStartPosition());
0N/A } else {
0N/A callback.handleStartTag(tag.getHTMLTag(), getAttributes(),
0N/A getBlockStartPosition());
0N/A flushAttributes();
0N/A }
0N/A }
0N/A
0N/A
0N/A protected void handleComment(char text[]) {
0N/A if (debugFlag) {
0N/A debug("comment: ->" + new String(text) + "<-"
0N/A + " pos: " + getCurrentPos());
0N/A }
0N/A callback.handleComment(text, getBlockStartPosition());
0N/A }
0N/A
0N/A /**
0N/A * Handle Empty Tag.
0N/A */
0N/A protected void handleEmptyTag(TagElement tag) throws ChangedCharSetException {
0N/A
0N/A Element elem = tag.getElement();
0N/A if (elem == dtd.meta && !ignoreCharSet) {
0N/A SimpleAttributeSet atts = getAttributes();
0N/A if (atts != null) {
0N/A String content = (String)atts.getAttribute(HTML.Attribute.CONTENT);
0N/A if (content != null) {
0N/A if ("content-type".equalsIgnoreCase((String)atts.getAttribute(HTML.Attribute.HTTPEQUIV))) {
0N/A if (!content.equalsIgnoreCase("text/html") &&
0N/A !content.equalsIgnoreCase("text/plain")) {
0N/A throw new ChangedCharSetException(content, false);
0N/A }
0N/A } else if ("charset" .equalsIgnoreCase((String)atts.getAttribute(HTML.Attribute.HTTPEQUIV))) {
0N/A throw new ChangedCharSetException(content, true);
0N/A }
0N/A }
0N/A }
0N/A }
0N/A if (inbody != 0 || elem == dtd.meta || elem == dtd.base || elem == dtd.isindex || elem == dtd.style || elem == dtd.link) {
0N/A if (debugFlag) {
0N/A if (tag.fictional()) {
0N/A debug("Empty Tag: " + tag.getHTMLTag() + " pos: " + getCurrentPos());
0N/A } else {
0N/A debug("Empty Tag: " + tag.getHTMLTag() + " attributes: "
0N/A + getAttributes() + " pos: " + getCurrentPos());
0N/A }
0N/A }
0N/A if (tag.fictional()) {
0N/A SimpleAttributeSet attrs = new SimpleAttributeSet();
0N/A attrs.addAttribute(HTMLEditorKit.ParserCallback.IMPLIED,
0N/A Boolean.TRUE);
0N/A callback.handleSimpleTag(tag.getHTMLTag(), attrs,
0N/A getBlockStartPosition());
0N/A } else {
0N/A callback.handleSimpleTag(tag.getHTMLTag(), getAttributes(),
0N/A getBlockStartPosition());
0N/A flushAttributes();
0N/A }
0N/A }
0N/A }
0N/A
0N/A /**
0N/A * Handle End Tag.
0N/A */
0N/A protected void handleEndTag(TagElement tag) {
0N/A Element elem = tag.getElement();
0N/A if (elem == dtd.body) {
0N/A inbody--;
0N/A } else if (elem == dtd.title) {
0N/A intitle--;
0N/A seentitle = true;
0N/A } else if (elem == dtd.head) {
0N/A inhead--;
0N/A } else if (elem == dtd.style) {
0N/A instyle--;
0N/A } else if (elem == dtd.script) {
0N/A inscript--;
0N/A }
0N/A if (debugFlag) {
0N/A debug("End Tag: " + tag.getHTMLTag() + " pos: " + getCurrentPos());
0N/A }
0N/A callback.handleEndTag(tag.getHTMLTag(), getBlockStartPosition());
0N/A
0N/A }
0N/A
0N/A /**
0N/A * Handle Text.
0N/A */
0N/A protected void handleText(char data[]) {
0N/A if (data != null) {
0N/A if (inscript != 0) {
0N/A callback.handleComment(data, getBlockStartPosition());
0N/A return;
0N/A }
0N/A if (inbody != 0 || ((instyle != 0) ||
0N/A ((intitle != 0) && !seentitle))) {
0N/A if (debugFlag) {
0N/A debug("text: ->" + new String(data) + "<-" + " pos: " + getCurrentPos());
0N/A }
0N/A callback.handleText(data, getBlockStartPosition());
0N/A }
0N/A }
0N/A }
0N/A
0N/A /*
0N/A * Error handling.
0N/A */
0N/A protected void handleError(int ln, String errorMsg) {
0N/A if (debugFlag) {
0N/A debug("Error: ->" + errorMsg + "<-" + " pos: " + getCurrentPos());
0N/A }
0N/A /* PENDING: need to improve the error string. */
0N/A callback.handleError(errorMsg, getCurrentPos());
0N/A }
0N/A
0N/A
0N/A /*
0N/A * debug messages
0N/A */
0N/A private void debug(String msg) {
0N/A System.out.println(msg);
0N/A }
0N/A}