0N/A/*
0N/A * CDDL HEADER START
0N/A *
0N/A * The contents of this file are subject to the terms of the
407N/A * Common Development and Distribution License (the "License").
0N/A * You may not use this file except in compliance with the License.
0N/A *
0N/A * See LICENSE.txt included in this distribution for the specific
0N/A * language governing permissions and limitations under the License.
0N/A *
0N/A * When distributing Covered Code, include this CDDL HEADER in each
0N/A * file and include the License file at LICENSE.txt.
0N/A * If applicable, add the following below this CDDL HEADER, with the
0N/A * fields enclosed by brackets "[]" replaced with your own identifying
0N/A * information: Portions Copyright [yyyy] [name of copyright owner]
0N/A *
0N/A * CDDL HEADER END
0N/A */
0N/A
0N/A/*
1344N/A * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
0N/A */
0N/Apackage org.opensolaris.opengrok.analysis.archive;
0N/A
392N/Aimport java.io.BufferedInputStream;
392N/Aimport java.io.IOException;
392N/Aimport java.io.InputStream;
392N/Aimport java.io.Reader;
392N/Aimport java.io.Writer;
1054N/Aimport java.util.logging.Level;
392N/Aimport java.util.zip.GZIPInputStream;
392N/Aimport org.apache.lucene.document.Document;
392N/Aimport org.apache.lucene.document.Field;
427N/Aimport org.opensolaris.opengrok.OpenGrokLogger;
392N/Aimport org.opensolaris.opengrok.analysis.AnalyzerGuru;
392N/Aimport org.opensolaris.opengrok.analysis.FileAnalyzer;
0N/Aimport org.opensolaris.opengrok.analysis.FileAnalyzer.Genre;
392N/Aimport org.opensolaris.opengrok.analysis.FileAnalyzerFactory;
0N/A
0N/A/**
1416N/A * Analyzes GZip files Created on September 22, 2005
0N/A *
0N/A * @author Chandan
0N/A */
0N/Apublic class GZIPAnalyzer extends FileAnalyzer {
1416N/A
202N/A private Genre g;
1416N/A
1054N/A @Override
0N/A public Genre getGenre() {
972N/A if (g != null) {
202N/A return g;
202N/A }
202N/A return super.getGenre();
0N/A }
202N/A
202N/A protected GZIPAnalyzer(FileAnalyzerFactory factory) {
972N/A super(factory);
0N/A }
0N/A private FileAnalyzer fa;
1190N/A
1054N/A @Override
889N/A public void analyze(Document doc, InputStream in) throws IOException {
889N/A BufferedInputStream gzis = new BufferedInputStream(new GZIPInputStream(in));
889N/A String path = doc.get("path");
1416N/A if (path != null
1416N/A && (path.endsWith(".gz") || path.endsWith(".GZ") || path.endsWith(".Gz"))) {
889N/A String newname = path.substring(0, path.length() - 3);
889N/A //System.err.println("GZIPPED OF = " + newname);
889N/A fa = AnalyzerGuru.getAnalyzer(gzis, newname);
889N/A if (fa == null) {
889N/A this.g = Genre.DATA;
1054N/A OpenGrokLogger.getLogger().log(Level.WARNING, "Did not analyze {0}, detected as data.", newname);
1416N/A //TODO we could probably wrap tar analyzer here, need to do research on reader coming from gzis ...
889N/A } else { // cant recurse!
1416N/A //simple file gziped case captured here
889N/A if (fa.getGenre() == Genre.PLAIN || fa.getGenre() == Genre.XREFABLE) {
889N/A this.g = Genre.XREFABLE;
889N/A } else {
460N/A this.g = Genre.DATA;
889N/A }
889N/A fa.analyze(doc, gzis);
889N/A if (doc.get("t") != null) {
889N/A doc.removeField("t");
889N/A if (g == Genre.XREFABLE) {
1416N/A doc.add(new Field("t", g.typeName(), AnalyzerGuru.string_ft_stored_nanalyzed_norms));
460N/A }
460N/A }
1416N/A
460N/A }
460N/A }
0N/A }
1190N/A
1054N/A @Override
1416N/A public TokenStreamComponents createComponents(String fieldName, Reader reader) {
972N/A if (fa != null) {
1416N/A return fa.createComponents(fieldName, reader);
972N/A }
1416N/A return super.createComponents(fieldName, reader);
0N/A }
972N/A
0N/A /**
0N/A * Write a cross referenced HTML file.
1416N/A *
0N/A * @param out Writer to store HTML cross-reference
0N/A */
1054N/A @Override
0N/A public void writeXref(Writer out) throws IOException {
972N/A if ((fa != null) && (fa.getGenre() == Genre.PLAIN || fa.getGenre() == Genre.XREFABLE)) {
460N/A fa.writeXref(out);
972N/A }
0N/A }
0N/A}