opengrok/analysis/AnalyzerGuru.java

	AnalyzerGuru.java revision 1181
1364N/A/*
1364N/A * CDDL HEADER START
1364N/A *
1364N/A * The contents of this file are subject to the terms of the
1364N/A * Common Development and Distribution License (the "License").
1364N/A * You may not use this file except in compliance with the License.
1364N/A *
1364N/A * See LICENSE.txt included in this distribution for the specific
1364N/A * language governing permissions and limitations under the License.
1364N/A *
1364N/A * When distributing Covered Code, include this CDDL HEADER in each
1364N/A * file and include the License file at LICENSE.txt.
1364N/A * If applicable, add the following below this CDDL HEADER, with the
1364N/A * fields enclosed by brackets "[]" replaced with your own identifying
1364N/A * information: Portions Copyright [yyyy] [name of copyright owner]
1364N/A *
1364N/A * CDDL HEADER END
1364N/A */
1364N/A
1364N/A/*
1383N/A * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
1364N/A */
1364N/Apackage org.opensolaris.opengrok.analysis;
1370N/A
1364N/Aimport java.io.File;
1364N/Aimport java.io.IOException;
1370N/Aimport java.io.InputStream;
1364N/Aimport java.io.Reader;
1364N/Aimport java.io.Writer;
1364N/Aimport java.util.ArrayList;
1364N/Aimport java.util.HashMap;
1364N/Aimport java.util.List;
1364N/Aimport java.util.Locale;
1364N/Aimport java.util.Map;
1389N/Aimport java.util.SortedMap;
1364N/Aimport java.util.TreeMap;
1364N/Aimport java.util.logging.Level;
1383N/Aimport org.apache.lucene.document.DateTools;
1364N/Aimport org.apache.lucene.document.Document;
1364N/Aimport org.apache.lucene.document.Field;
1364N/Aimport org.opensolaris.opengrok.OpenGrokLogger;
1364N/Aimport org.opensolaris.opengrok.analysis.FileAnalyzer.Genre;
1364N/Aimport org.opensolaris.opengrok.analysis.archive.BZip2AnalyzerFactory;
1364N/Aimport org.opensolaris.opengrok.analysis.archive.GZIPAnalyzerFactory;
1364N/Aimport org.opensolaris.opengrok.analysis.archive.TarAnalyzerFactory;
1364N/Aimport org.opensolaris.opengrok.analysis.archive.ZipAnalyzerFactory;
1364N/Aimport org.opensolaris.opengrok.analysis.c.CAnalyzerFactory;
1364N/Aimport org.opensolaris.opengrok.analysis.c.CxxAnalyzerFactory;
1383N/Aimport org.opensolaris.opengrok.analysis.data.IgnorantAnalyzerFactory;
1364N/Aimport org.opensolaris.opengrok.analysis.data.ImageAnalyzerFactory;
1364N/Aimport org.opensolaris.opengrok.analysis.document.TroffAnalyzerFactory;
1364N/Aimport org.opensolaris.opengrok.analysis.executables.ELFAnalyzerFactory;
1364N/Aimport org.opensolaris.opengrok.analysis.executables.JarAnalyzerFactory;
1364N/Aimport org.opensolaris.opengrok.analysis.executables.JavaClassAnalyzerFactory;
1364N/Aimport org.opensolaris.opengrok.analysis.fortran.FortranAnalyzerFactory;
1383N/Aimport org.opensolaris.opengrok.analysis.java.JavaAnalyzerFactory;
1383N/Aimport org.opensolaris.opengrok.analysis.lisp.LispAnalyzerFactory;
1383N/Aimport org.opensolaris.opengrok.analysis.perl.PerlAnalyzerFactory;
1383N/Aimport org.opensolaris.opengrok.analysis.php.PhpAnalyzerFactory;
1364N/Aimport org.opensolaris.opengrok.analysis.plain.PlainAnalyzerFactory;
1364N/Aimport org.opensolaris.opengrok.analysis.plain.XMLAnalyzerFactory;
1364N/Aimport org.opensolaris.opengrok.analysis.python.PythonAnalyzerFactory;
1364N/Aimport org.opensolaris.opengrok.analysis.sh.ShAnalyzerFactory;
1364N/Aimport org.opensolaris.opengrok.analysis.sql.SQLAnalyzerFactory;
1364N/Aimport org.opensolaris.opengrok.analysis.tcl.TclAnalyzerFactory;
1364N/Aimport org.opensolaris.opengrok.configuration.Project;
1364N/Aimport org.opensolaris.opengrok.history.Annotation;
1364N/Aimport org.opensolaris.opengrok.history.HistoryException;
1364N/Aimport org.opensolaris.opengrok.history.HistoryGuru;
1364N/Aimport org.opensolaris.opengrok.history.HistoryReader;
1364N/Aimport org.opensolaris.opengrok.web.Util;
1364N/A
1364N/A/**
1364N/A * Manages and porvides Analyzers as needed. Please see
1364N/A * <a href="http://www.opensolaris.org/os/project/opengrok/manual/internals/">
1364N/A * this</a> page for a great description of the purpose of the AnalyzerGuru.
1364N/A *
1383N/A * Created on September 22, 2005
1364N/A * @author Chandan
1364N/A */
1364N/Apublic class AnalyzerGuru {
1364N/A
1364N/A    /** The default {@code FileAnalyzerFactory} instance. */
1364N/A    private static final FileAnalyzerFactory
1364N/A        DEFAULT_ANALYZER_FACTORY = new FileAnalyzerFactory();
1370N/A
1364N/A    /** Map from file names to analyzer factories. */
1364N/A    private static final Map<String, FileAnalyzerFactory>
1370N/A        FILE_NAMES = new HashMap<String, FileAnalyzerFactory>();
1364N/A
1364N/A    /** Map from file extensions to analyzer factories. */
1370N/A    private static final Map<String, FileAnalyzerFactory>
1364N/A        ext = new HashMap<String, FileAnalyzerFactory>();
1364N/A
1364N/A    // @TODO: have a comparator
1370N/A    /** Map from magic strings to analyzer factories. */
1364N/A    private static final SortedMap<String, FileAnalyzerFactory>
1364N/A        magics = new TreeMap<String, FileAnalyzerFactory>();
1383N/A
1364N/A    /**
1364N/A     * List of matcher objects which can be used to determine which analyzer
1389N/A     * factory to use.
1383N/A     */
1364N/A    private static final List<FileAnalyzerFactory.Matcher>
1370N/A        matchers = new ArrayList<FileAnalyzerFactory.Matcher>();
1364N/A
1364N/A    /** List of all registered {@code FileAnalyzerFactory} instances. */
1364N/A    private static final List<FileAnalyzerFactory>
1364N/A        factories = new ArrayList<FileAnalyzerFactory>();
1383N/A
1364N/A    /*
1364N/A     * If you write your own analyzer please register it here
1383N/A     */
1364N/A    static {
1383N/A        FileAnalyzerFactory[] analyzers = {
1383N/A            DEFAULT_ANALYZER_FACTORY,
1370N/A            new IgnorantAnalyzerFactory(),
1364N/A            new BZip2AnalyzerFactory(),
1364N/A            new XMLAnalyzerFactory(),
1364N/A            new TroffAnalyzerFactory(),
1364N/A            new ELFAnalyzerFactory(),
1370N/A            new JavaClassAnalyzerFactory(),
1364N/A            new ImageAnalyzerFactory(),
1364N/A            JarAnalyzerFactory.DEFAULT_INSTANCE,
1364N/A            ZipAnalyzerFactory.DEFAULT_INSTANCE,
1383N/A            new TarAnalyzerFactory(),
1383N/A            new CAnalyzerFactory(),
1364N/A            new CxxAnalyzerFactory(),
1383N/A            new ShAnalyzerFactory(),
1383N/A            PlainAnalyzerFactory.DEFAULT_INSTANCE,
1364N/A            new GZIPAnalyzerFactory(),
1370N/A            new JavaAnalyzerFactory(),
1364N/A            new PythonAnalyzerFactory(),
1364N/A            new PerlAnalyzerFactory(),
1370N/A            new PhpAnalyzerFactory(),
1364N/A            new LispAnalyzerFactory(),
1364N/A            new TclAnalyzerFactory(),
1364N/A            new SQLAnalyzerFactory(),
1364N/A            new FortranAnalyzerFactory()
1383N/A        };
1383N/A
1383N/A        for (FileAnalyzerFactory analyzer : analyzers) {
1383N/A            registerAnalyzer(analyzer);
1389N/A        }
1389N/A    }
1383N/A
1383N/A    /**
1383N/A     * Register a {@code FileAnalyzerFactory} instance.
1383N/A     */
1383N/A    private static void registerAnalyzer(FileAnalyzerFactory factory) {
1389N/A        for (String name : factory.getFileNames()) {
1389N/A            FileAnalyzerFactory old = FILE_NAMES.put(name, factory);
1389N/A            assert old == null :
1389N/A                "name '" + name + "' used in multiple analyzers";
1383N/A        }
1383N/A        for (String suffix : factory.getSuffixes()) {
1383N/A            FileAnalyzerFactory old = ext.put(suffix, factory);
1383N/A            assert old == null :
1383N/A            "suffix '" + suffix + "' used in multiple analyzers";
1389N/A        }
1389N/A        for (String magic : factory.getMagicStrings()) {
1383N/A            FileAnalyzerFactory old = magics.put(magic, factory);
1383N/A            assert old == null :
1383N/A                "magic '" + magic + "' used in multiple analyzers";
1383N/A        }
1364N/A        matchers.addAll(factory.getMatchers());
1364N/A        factories.add(factory);
1389N/A    }
1364N/A
1364N/A    /**
1364N/A     *  Instruct the AnalyzerGuru to use a given analyzer for a given
1364N/A     *  file extension.
1364N/A     *  @param extension the file-extension to add
1364N/A     *  @param factory   a factory which creates
1364N/A     *                   the analyzer to use for the given extension
1364N/A     *                  (if you pass null as the analyzer, you will disable
1364N/A     *                   the analyzer used for that extension)
1364N/A     */
1364N/A    public static void addExtension(String extension,
1364N/A                                    FileAnalyzerFactory factory) {
1364N/A        if (factory == null) {
1364N/A            ext.remove(extension);
1364N/A        } else {
1364N/A            ext.put(extension, factory);
1364N/A        }
1364N/A    }
1364N/A
1364N/A    /**
1364N/A     * Get the default Analyzer.
1364N/A     */
1364N/A    public static FileAnalyzer getAnalyzer() {
1364N/A        return DEFAULT_ANALYZER_FACTORY.getAnalyzer();
1364N/A    }
1364N/A
1364N/A    /**
1364N/A     * Get an analyzer suited to analyze a file. This function will reuse
1364N/A     * analyzers since they are costly.
1364N/A     *
1364N/A     * @param in Input stream containing data to be analyzed
1364N/A     * @param file Name of the file to be analyzed
1364N/A     * @return An analyzer suited for that file content
1364N/A     * @throws java.io.IOException If an error occurs while accessing the
1364N/A     *                             data in the input stream.
1364N/A     */
1364N/A    public static FileAnalyzer getAnalyzer(InputStream in, String file) throws IOException {
1364N/A        FileAnalyzerFactory factory = find(in, file);
1364N/A        if (factory == null) {
1364N/A            return getAnalyzer();
1364N/A        }
1364N/A        return factory.getAnalyzer();
1364N/A    }
1364N/A
1364N/A    /**
1364N/A     * Create a Lucene document and fill in the required fields
1364N/A     * @param file The file to index
1364N/A     * @param in The data to generate the index for
1364N/A     * @param path Where the file is located (from source root)
1364N/A     * @return The Lucene document to add to the index database
1364N/A     * @throws java.io.IOException If an exception occurs while collecting the
1364N/A     *                             datas
1364N/A     */
1364N/A    public Document getDocument(File file, InputStream in, String path,
1364N/A                                FileAnalyzer fa) throws IOException {
1364N/A        Document doc = new Document();
1364N/A        String date = DateTools.timeToString(file.lastModified(), DateTools.Resolution.MILLISECOND);
1364N/A        doc.add(new Field("u", Util.uid(path, date), Field.Store.YES, Field.Index.NOT_ANALYZED));
1364N/A        doc.add(new Field("fullpath", file.getAbsolutePath(), Field.Store.NO, Field.Index.NOT_ANALYZED));
1364N/A
1364N/A        try {
1364N/A            HistoryReader hr = HistoryGuru.getInstance().getHistoryReader(file);
1364N/A            if (hr != null) {
1364N/A                doc.add(new Field("hist", hr));
1364N/A                // date = hr.getLastCommentDate() //RFE
1364N/A            }
1364N/A        } catch (HistoryException e) {
1364N/A            OpenGrokLogger.getLogger().log(Level.WARNING, "An error occurred while reading history: ", e);
1364N/A        }
1364N/A        doc.add(new Field("date", date, Field.Store.YES, Field.Index.NOT_ANALYZED));
1364N/A        if (path != null) {
1364N/A            doc.add(new Field("path", path, Field.Store.YES, Field.Index.ANALYZED));
1364N/A            Project project = Project.getProject(path);
1364N/A            if (project != null) {
1364N/A                doc.add(new Field("project", project.getPath(), Field.Store.YES, Field.Index.ANALYZED));
1364N/A            }
1364N/A        }
1364N/A
1364N/A        if (fa != null) {
1364N/A            Genre g = fa.getGenre();
1364N/A            if (g == Genre.PLAIN) {
1364N/A                doc.add(new Field("t", "p", Field.Store.YES, Field.Index.NOT_ANALYZED));
1364N/A            } else if (g == Genre.XREFABLE) {
1364N/A                doc.add(new Field("t", "x", Field.Store.YES, Field.Index.NOT_ANALYZED));
1364N/A            } else if (g == Genre.HTML) {
1364N/A                doc.add(new Field("t", "h", Field.Store.YES, Field.Index.NOT_ANALYZED));
1364N/A            }
1364N/A            fa.analyze(doc, in);
1364N/A        }
1364N/A
1364N/A        return doc;
1364N/A    }
1364N/A
1364N/A    /**
1364N/A     * Get the content type for a named file.
1364N/A     *
1364N/A     * @param in The input stream we want to get the content type for (if
1364N/A     *           we cannot determine the content type by the filename)
1364N/A     * @param file The name of the file
1364N/A     * @return The contentType suitable for printing to response.setContentType() or null
1364N/A     *         if the factory was not found
1364N/A     * @throws java.io.IOException If an error occurs while accessing the input
1364N/A     *                             stream.
1364N/A     */
1364N/A    public static String getContentType(InputStream in, String file) throws IOException {
1364N/A        FileAnalyzerFactory factory = find(in, file);
1364N/A        String type = null;
1364N/A        if (factory != null) {
1364N/A            type = factory.getContentType();
1364N/A        }
1364N/A        return type;
1364N/A    }
1364N/A
1364N/A    /**
1364N/A     * Write a browsable version of the file
1364N/A     *
1364N/A     * @param factory The analyzer factory for this filetype
1364N/A     * @param in The input stream containing the data
1364N/A     * @param out Where to write the result
1364N/A     * @param defs definitions for the source file, if available
1364N/A     * @param annotation Annotation information for the file
1364N/A     * @param project Project the file belongs to
1364N/A     * @throws java.io.IOException If an error occurs while creating the
1364N/A     *                             output
1364N/A     */
1364N/A    public static void writeXref(FileAnalyzerFactory factory, Reader in,
1364N/A                                 Writer out, Definitions defs,
1389N/A                                 Annotation annotation, Project project)
1364N/A        throws IOException
1364N/A    {
1364N/A        Reader input = in;
1364N/A        if (factory.getGenre() == Genre.PLAIN) {
949N/A            // This is some kind of text file, so we need to expand tabs to
1186N/A            // spaces to match the project's tab settings.
1186N/A            input = ExpandTabsReader.wrap(in, project);
1294N/A        }
1186N/A        factory.writeXref(input, out, defs, annotation, project);
949N/A    }
954N/A
1186N/A    /**
949N/A     * Get the genre of a file
1186N/A     *
1186N/A     * @param file The file to inpect
1186N/A     * @return The genre suitable to decide how to display the file
1186N/A     */
1186N/A    public static Genre getGenre(String file) {
1186N/A        return getGenre(find(file));
1186N/A    }
1186N/A
1186N/A    /**
1186N/A     * Get the genre of a bulk of data
1186N/A     *
1186N/A     * @param in A stream containing the data
1186N/A     * @return The genre suitable to decide how to display the file
1186N/A     * @throws java.io.IOException If an error occurs while getting the content
1186N/A     */
1186N/A    public static Genre getGenre(InputStream in) throws IOException {
1186N/A        return getGenre(find(in));
1186N/A    }
1186N/A
1186N/A    /**
1186N/A     * Get the genre for a named class (this is most likely an analyzer)
1186N/A     * @param factory the analyzer factory to get the genre for
949N/A     * @return The genre of this class (null if not found)
949N/A     */
1186N/A    public static Genre getGenre(FileAnalyzerFactory factory) {
1186N/A        if (factory != null) {
1186N/A            return factory.getGenre();
1186N/A        }
1186N/A        return null;
1390N/A    }
1186N/A
1186N/A    /**
1390N/A     * Find a {@code FileAnalyzerFactory} with the specified class name. If one
1390N/A     * doesn't exist, create one and register it.
1390N/A     *
1390N/A     * @param factoryClassName name of the factory class
1390N/A     * @return a file analyzer factory
1186N/A     *
1186N/A     * @throws ClassNotFoundException if there is no class with that name
1390N/A     * @throws ClassCastException if the class is not a subclass of {@code
1390N/A     * FileAnalyzerFactory}
1390N/A     * @throws IllegalAccessException if the constructor cannot be accessed
1254N/A     * @throws InstantiationException if the class cannot be instantiated
1254N/A     */
1186N/A    public static FileAnalyzerFactory findFactory(String factoryClassName)
1186N/A        throws ClassNotFoundException, IllegalAccessException,
1186N/A               InstantiationException
1186N/A    {
1186N/A        return findFactory(Class.forName(factoryClassName));
1186N/A    }
1186N/A
1186N/A    /**
1186N/A     * Find a {@code FileAnalyzerFactory} which is an instance of the specified
1186N/A     * class. If one doesn't exist, create one and register it.
1186N/A     *
1388N/A     * @param factoryClass the factory class
1186N/A     * @return a file analyzer factory
1186N/A     *
949N/A     * @throws ClassCastException if the class is not a subclass of {@code
949N/A     * FileAnalyzerFactory}
1186N/A     * @throws IllegalAccessException if the constructor cannot be accessed
1186N/A     * @throws InstantiationException if the class cannot be instantiated
1186N/A     */
1390N/A    private static FileAnalyzerFactory findFactory(Class factoryClass)
1186N/A        throws InstantiationException, IllegalAccessException
1186N/A    {
1186N/A        for (FileAnalyzerFactory f : factories) {
1186N/A            if (f.getClass() == factoryClass) {
1389N/A                return f;
1186N/A            }
1186N/A        }
1186N/A        FileAnalyzerFactory f =
1186N/A            (FileAnalyzerFactory) factoryClass.newInstance();
1186N/A        registerAnalyzer(f);
1186N/A        return f;
1186N/A    }
1186N/A
1186N/A    /**
1186N/A     * Finds a suitable analyser class for file name. If the analyzer cannot
1186N/A     * be determined by the file extension, try to look at the data in the
1186N/A     * InputStream to find a suitable analyzer.
1186N/A     *
1390N/A     * Use if you just want to find file type.
1186N/A     *
1186N/A     *
1186N/A     * @param in The input stream containing the data
1186N/A     * @param file The file name to get the analyzer for
1186N/A     * @return the analyzer factory to use
1186N/A     * @throws java.io.IOException If a problem occurs while reading the data
1186N/A     */
949N/A    public static FileAnalyzerFactory find(InputStream in, String file)
949N/A        throws IOException
1186N/A    {
1390N/A        FileAnalyzerFactory factory = find(file);
1390N/A        //TODO above is not that great, since if 2 analyzers share one extension
1390N/A        //then only the first one registered will own it
1390N/A        //it would be cool if above could return more analyzers and below would
1390N/A        //then decide between them ...
1390N/A        if (factory != null) {
1390N/A            return factory;
1390N/A        }
1390N/A        return find(in);
1186N/A    }
1186N/A
1186N/A    /**
1186N/A     * Finds a suitable analyser class for file name.
1390N/A     *
1186N/A     * @param file The file name to get the analyzer for
1186N/A     * @return the analyzer factory to use
1186N/A     */
1390N/A    public static FileAnalyzerFactory find(String file) {
1390N/A        String path = file;
1390N/A        int i = 0;
1390N/A        if (((i = path.lastIndexOf('/')) > 0 || (i = path.lastIndexOf('\\')) > 0)
1390N/A            && (i + 1 < path.length())) {
1390N/A            path = path.substring(i + 1);
1390N/A        }
1390N/A        int dotpos = path.lastIndexOf('.');
1390N/A        if (dotpos >= 0) {
1390N/A            FileAnalyzerFactory factory =
1390N/A                ext.get(path.substring(dotpos + 1).toUpperCase(Locale.getDefault()));
1390N/A            if (factory != null) {
1390N/A                return factory;
1390N/A            }
1186N/A        }
1186N/A        // file doesn't have any of the extensions we know, try full match
1186N/A        return FILE_NAMES.get(path.toUpperCase(Locale.getDefault()));
1186N/A    }
1186N/A
1186N/A    /**
1186N/A     * Finds a suitable analyser class for the data in this stream
1186N/A     *
1186N/A     * @param in The stream containing the data to analyze
1186N/A     * @return the analyzer factory to use
1186N/A     * @throws java.io.IOException if an error occurs while reading data from
1186N/A     *                             the stream
1186N/A     */
1186N/A    public static FileAnalyzerFactory find(InputStream in) throws IOException {
1186N/A        in.mark(8);
1186N/A        byte[] content = new byte[8];
949N/A        int len = in.read(content);
949N/A        in.reset();
1186N/A        if (len < 4) {
1186N/A            return null;
1186N/A        }
1186N/A
1186N/A        FileAnalyzerFactory factory = find(content);
1186N/A        if (factory != null) {
1186N/A            return factory;
1186N/A        }
1186N/A
1186N/A        for (FileAnalyzerFactory.Matcher matcher : matchers) {
1186N/A            FileAnalyzerFactory fac = matcher.isMagic(content, in);
1186N/A            if (fac != null) {
1186N/A                return fac;
1186N/A            }
1186N/A        }
1355N/A
1186N/A        return null;
1186N/A    }
1186N/A
1186N/A    /**
1186N/A     * Finds a suitable analyser class for a magic signature
1389N/A     *
1186N/A     * @param signature the magic signature look up
1186N/A     * @return the analyzer factory to use
1186N/A     */
1186N/A    private static FileAnalyzerFactory find(byte[] signature)
1186N/A            throws IOException {
1186N/A        // XXX this assumes ISO-8859-1 encoding (and should work in most cases
1186N/A        // for US-ASCII, UTF-8 and other ISO-8859-* encodings, but not always),
1186N/A        // we should try to be smarter than this...
1186N/A        char[] chars = new char[signature.length > 8 ? 8 : signature.length];
1186N/A        for (int i = 0; i < chars.length; i++) {
1186N/A            chars[i] = (char) (0xFF & signature[i]);
1186N/A        }
1186N/A
1186N/A        String sig = new String(chars);
1186N/A
1186N/A        FileAnalyzerFactory a = magics.get(sig);
1186N/A        if (a == null) {
1186N/A            String sigWithoutBOM = stripBOM(signature);
1186N/A            for (Map.Entry<String, FileAnalyzerFactory> entry :
1186N/A                     magics.entrySet()) {
1390N/A                if (sig.startsWith(entry.getKey())) {
949N/A                    return entry.getValue();
1186N/A                }
1186N/A                // See if text files have the magic sequence if we remove the
1186N/A                // byte-order marker
1186N/A                if (sigWithoutBOM != null &&
1186N/A                        entry.getValue().getGenre() == Genre.PLAIN &&
1186N/A                        sigWithoutBOM.startsWith(entry.getKey())) {
1186N/A                    return entry.getValue();
1186N/A                }
1186N/A            }
1186N/A        }
1186N/A        return a;
1186N/A    }
1186N/A
1390N/A    /** Byte-order markers. */
1186N/A    private static final Map<String, byte[]> BOMS =
1186N/A            new HashMap<String, byte[]>();
1186N/A    static {
1186N/A        BOMS.put("UTF-8", new byte[] {(byte) 0xEF, (byte) 0xBB, (byte) 0xBF});
949N/A        BOMS.put("UTF-16BE", new byte[] {(byte) 0xFE, (byte) 0xFF});
1186N/A        BOMS.put("UTF-16LE", new byte[] {(byte) 0xFF, (byte) 0xFE});
1390N/A    }
1390N/A
1390N/A    /**
1186N/A     * Strip away the byte-order marker from the string, if it has one.
1186N/A     *
1390N/A     * @param sig a sequence of bytes from which to remove the BOM
1390N/A     * @return a string without the byte-order marker, or <code>null</code> if
1186N/A     * the string doesn't start with a BOM
1186N/A     */
1186N/A    public static String stripBOM(byte[] sig) throws IOException {
1423N/A        for (Map.Entry<String, byte[]> entry : BOMS.entrySet()) {
1423N/A            String encoding = entry.getKey();
1423N/A            byte[] bom = entry.getValue();
1186N/A            if (sig.length > bom.length) {
1423N/A                int i = 0;
1423N/A                while (i < bom.length && sig[i] == bom[i]) {
1423N/A                    i++;
1423N/A                }
1423N/A                if (i == bom.length) {
1186N/A                    // BOM matched beginning of signature
1294N/A                    return new String(
1186N/A                            sig,
1186N/A                            bom.length,                // offset
1423N/A                            sig.length - bom.length,   // length
1423N/A                            encoding);
1423N/A                }
1423N/A            }
1186N/A        }
1423N/A        return null;
1423N/A    }
1423N/A}
1423N/A