EftarFile.java revision 460
588N/A/*
588N/A * CDDL HEADER START
588N/A *
588N/A * The contents of this file are subject to the terms of the
588N/A * Common Development and Distribution License (the "License").
588N/A * You may not use this file except in compliance with the License.
588N/A *
588N/A * See LICENSE.txt included in this distribution for the specific
588N/A * language governing permissions and limitations under the License.
588N/A *
588N/A * When distributing Covered Code, include this CDDL HEADER in each
588N/A * file and include the License file at LICENSE.txt.
588N/A * If applicable, add the following below this CDDL HEADER, with the
588N/A * fields enclosed by brackets "[]" replaced with your own identifying
588N/A * information: Portions Copyright [yyyy] [name of copyright owner]
588N/A *
588N/A * CDDL HEADER END
588N/A */
588N/A
588N/A/*
588N/A * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
1185N/A * Use is subject to license terms.
1185N/A */
588N/A
1185N/Apackage org.opensolaris.opengrok.web;
1190N/A
588N/Aimport java.io.BufferedOutputStream;
588N/Aimport java.io.BufferedReader;
588N/Aimport java.io.DataOutputStream;
588N/Aimport java.io.FileNotFoundException;
588N/Aimport java.io.FileOutputStream;
588N/Aimport java.io.FileReader;
588N/Aimport java.io.IOException;
1190N/Aimport java.io.RandomAccessFile;
588N/Aimport java.util.Map;
588N/Aimport java.util.StringTokenizer;
1190N/Aimport java.util.TreeMap;
588N/Aimport java.util.logging.Level;
937N/Aimport org.opensolaris.opengrok.OpenGrokLogger;
588N/A
588N/A
588N/A/**
588N/A * An Extremely Fast Tagged Attribute Read-only File System
588N/A * Created on October 12, 2005
588N/A *
588N/A * A Eftar File has the following format
588N/A * FILE --> Record ( Record | tagString ) *
588N/A * Record --> 64bit:Hash 16bit:childrenOffset 16bit:(numberChildren|lenthOfTag) 16bit:tagOffset
937N/A *
1481N/A * It is a tree of tagged names,
1481N/A * doing binary search in sorted list of children
1481N/A *
1481N/A * @author Chandan
1481N/A */
1481N/Apublic class EftarFile {
1481N/A
1481N/A public static final int RECORD_LENGTH = 14;
1481N/A private long offset;
1481N/A private DataOutputStream out;
1481N/A
1481N/A class Node {
1481N/A
1481N/A public long hash;
1481N/A public String tag;
1481N/A public Map<Long, Node> children;
1481N/A public long tagOffset;
1481N/A public long childOffset;
1190N/A public long myOffset;
1185N/A
937N/A public Node(long hash, String tag) {
937N/A this.hash = hash;
937N/A this.tag = tag;
937N/A children = new TreeMap<Long, Node>();
937N/A }
937N/A
937N/A public Node put(long hash, String desc) {
937N/A if (children.get(hash) == null) {
937N/A children.put(hash, new Node(hash, desc));
937N/A }
937N/A return children.get(hash);
937N/A }
1185N/A
937N/A public Node get(long hash) {
1190N/A return children.get(hash);
588N/A }
}
class FNode {
public long offset;
public long hash;
public int childOffset;
public int numChildren;
public int tagOffset;
public FNode(RandomAccessFile f) throws Throwable {
offset = f.getFilePointer();
hash = f.readLong();
childOffset = f.readUnsignedShort();
numChildren = f.readUnsignedShort();
tagOffset = f.readUnsignedShort();
}
public FNode(long hash, long offset, int childOffset, int num, int tagOffset) {
this.hash = hash;
this.offset = offset;
this.childOffset = childOffset;
this.numChildren = num;
this.tagOffset = tagOffset;
}
public FNode get(long hash, RandomAccessFile f) throws Throwable {
if (childOffset == 0) {
return null;
}
return sbinSearch(offset + childOffset, numChildren, hash, f);
}
private FNode sbinSearch(long start, int len, long hash, RandomAccessFile f) throws Throwable {
int b = 0;
int e = len;
while (b <= e) {
int m = (b + e) / 2;
f.seek(start + m * RECORD_LENGTH);
long mhash = f.readLong();
if (hash > mhash) {
b = m + 1;
} else if (hash < mhash) {
e = m - 1;
} else {
return new FNode(mhash, f.getFilePointer() - 8l, f.readUnsignedShort(), f.readUnsignedShort(), f.readUnsignedShort());
}
}
return null;
}
}
public static long myHash(String name) {
if (name == null || name.length() == 0) {
return 0;
}
long hash = 2861;
int n = name.length();
if (n > 100) {
n = 100;
}
for (int i = 0; i < n; i++) {
hash = (hash * 641 + name.charAt(i) * 2969 + hash << 6) % 9322397;
}
return hash;
}
private void write(Node n) throws IOException {
if (n.tag != null) {
out.write(n.tag.getBytes());
offset += n.tag.length();
}
for (Node childnode : n.children.values()) {
out.writeLong(childnode.hash);
if (childnode.children.size() > 0) {
out.writeShort((short) (childnode.childOffset - offset));
out.writeShort((short) childnode.children.size());
} else {
out.writeShort(0);
if (childnode.tag == null) {
out.writeShort((short) 0);
} else {
out.writeShort((short) childnode.tag.length());
}
}
if (childnode.tag == null) {
out.writeShort(0);
} else {
out.writeShort((short) (childnode.tagOffset - offset));
}
offset += RECORD_LENGTH;
}
for (Node childnode : n.children.values()) {
write(childnode);
}
}
private void traverse(Node n) {
if (n.tag == null) {
n.tagOffset = 0;
} else {
n.tagOffset = offset;
offset += n.tag.length();
}
if (n.children.size() > 0) {
n.childOffset = offset;
offset += (RECORD_LENGTH * n.children.size());
} else {
n.childOffset = 0;
}
for (Node childnode : n.children.values()) {
traverse(childnode);
}
}
private Node root;
public void readInput(String tagsPath) throws IOException {
BufferedReader r = new BufferedReader(new FileReader(tagsPath));
if (root == null) {
root = new Node(1, null);
}
String line;
int size = 0;
while ((line = r.readLine()) != null) {
int tab = line.indexOf('\t');
if (tab > 0) {
String path = line.substring(0, tab);
String desc = line.substring(tab + 1);
size += desc.length() + 1 + 15;
StringTokenizer toks = new StringTokenizer(path, "\\/");
Node n = root;
while (toks.hasMoreTokens()) {
n = n.put(myHash(toks.nextToken()), null);
}
n.tag = desc;
}
}
try {
r.close();
} catch (IOException e) {
}
}
public void write(String outPath) throws FileNotFoundException, IOException {
offset = RECORD_LENGTH;
traverse(root);
out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(outPath)));
out.writeLong(0x5e33);
out.writeShort(RECORD_LENGTH);
out.writeShort(root.children.size());
out.writeShort(0);
offset = RECORD_LENGTH;
write(root);
out.close();
}
public void create(String[] args) throws IOException, FileNotFoundException {
for (int i = 0; i < args.length - 1; i++) {
readInput(args[i]);
}
write(args[args.length - 1]);
}
@SuppressWarnings("PMD.SystemPrintln")
public static void main(String[] args) {
if (args.length < 2) {
System.err.println("Usage inputFile [inputFile ...] outputFile");
System.exit(1);
}
try {
EftarFile ef = new EftarFile();
ef.create(args);
} catch (Exception e) {
OpenGrokLogger.getLogger().log(Level.WARNING, "EftarFile error", e);
}
}
}