sun/net/URLCanonicalizer.java

0N/A/*
2362N/A * Copyright (c) 1996, Oracle and/or its affiliates. All rights reserved.
0N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
0N/A *
0N/A * This code is free software; you can redistribute it and/or modify it
0N/A * under the terms of the GNU General Public License version 2 only, as
2362N/A * published by the Free Software Foundation.  Oracle designates this
0N/A * particular file as subject to the "Classpath" exception as provided
2362N/A * by Oracle in the LICENSE file that accompanied this code.
0N/A *
0N/A * This code is distributed in the hope that it will be useful, but WITHOUT
0N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0N/A * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
0N/A * version 2 for more details (a copy is included in the LICENSE file that
0N/A * accompanied this code).
0N/A *
0N/A * You should have received a copy of the GNU General Public License version
0N/A * 2 along with this work; if not, write to the Free Software Foundation,
0N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0N/A *
2362N/A * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
2362N/A * or visit www.oracle.com if you need additional information or have any
2362N/A * questions.
0N/A */
0N/A
0N/Apackage sun.net;
0N/A
0N/A/**
0N/A * Helper class to map URL "abbreviations" to real URLs.
0N/A * The default implementation supports the following mappings:
0N/A *   ftp.mumble.bar/... => ftp://ftp.mumble.bar/...
0N/A *   gopher.mumble.bar/... => gopher://gopher.mumble.bar/...
0N/A *   other.name.dom/... => http://other.name.dom/...
0N/A *   /foo/... => file:/foo/...
0N/A *
0N/A * Full URLs (those including a protocol name) are passed through unchanged.
0N/A *
0N/A * Subclassers can override or extend this behavior to support different
0N/A * or additional canonicalization policies.
0N/A *
0N/A * @author      Steve Byrne
0N/A */
0N/A
0N/Apublic class URLCanonicalizer {
0N/A    /**
0N/A     * Creates the default canonicalizer instance.
0N/A     */
0N/A    public URLCanonicalizer() { }
0N/A
0N/A    /**
0N/A     * Given a possibly abbreviated URL (missing a protocol name, typically),
0N/A     * this method's job is to transform that URL into a canonical form,
0N/A     * by including a protocol name and additional syntax, if necessary.
0N/A     *
0N/A     * For a correctly formed URL, this method should just return its argument.
0N/A     */
0N/A    public String canonicalize(String simpleURL) {
0N/A        String resultURL = simpleURL;
0N/A        if (simpleURL.startsWith("ftp.")) {
0N/A            resultURL = "ftp://" + simpleURL;
0N/A        } else if (simpleURL.startsWith("gopher.")) {
0N/A            resultURL = "gopher://" + simpleURL;
0N/A        } else if (simpleURL.startsWith("/")) {
0N/A            resultURL = "file:" + simpleURL;
0N/A        } else if (!hasProtocolName(simpleURL)) {
0N/A            if (isSimpleHostName(simpleURL)) {
0N/A                simpleURL = "www." + simpleURL + ".com";
0N/A            }
0N/A            resultURL = "http://" + simpleURL;
0N/A        }
0N/A
0N/A        return resultURL;
0N/A    }
0N/A
0N/A    /**
0N/A     * Given a possibly abbreviated URL, this predicate function returns
0N/A     * true if it appears that the URL contains a protocol name
0N/A     */
0N/A    public boolean hasProtocolName(String url) {
0N/A        int index = url.indexOf(':');
0N/A        if (index <= 0) {       // treat ":foo" as not having a protocol spec
0N/A            return false;
0N/A        }
0N/A
0N/A        for (int i = 0; i < index; i++) {
0N/A            char c = url.charAt(i);
0N/A
0N/A            // REMIND: this is a guess at legal characters in a protocol --
0N/A            // need to be verified
0N/A            if ((c >= 'A' && c <= 'Z')
0N/A                || (c >= 'a' && c <= 'z')
0N/A                || (c == '-')) {
0N/A                continue;
0N/A            }
0N/A
0N/A            // found an illegal character
0N/A            return false;
0N/A        }
0N/A
0N/A        return true;
0N/A    }
0N/A
0N/A    /**
0N/A     * Returns true if the URL is just a single name, no periods or
0N/A     * slashes, false otherwise
0N/A     **/
0N/A    protected boolean isSimpleHostName(String url) {
0N/A
0N/A        for (int i = 0; i < url.length(); i++) {
0N/A            char c = url.charAt(i);
0N/A
0N/A            // REMIND: this is a guess at legal characters in a protocol --
0N/A            // need to be verified
0N/A            if ((c >= 'A' && c <= 'Z')
0N/A                || (c >= 'a' && c <= 'z')
0N/A                || (c >= '0' && c <= '9')
0N/A                || (c == '-')) {
0N/A                continue;
0N/A            }
0N/A
0N/A            // found an illegal character
0N/A            return false;
0N/A        }
0N/A
0N/A        return true;
0N/A    }
0N/A}