/*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* Histogram derived from an integer array of events (int[]).
* @author John Rose
*/
final class Histogram {
// Compact histogram representation: 4 bytes per distinct value,
// plus 5 words per distinct count.
// These are created eagerly also, since that saves work.
// They cost another 8 bytes per distinct value.
/** Build a histogram given a sequence of values.
* To save work, the input should be sorted, but need not be.
*/
public
assert(assertWellFormed(valueSequence));
}
public
}
/** Build a histogram given a compact matrix of counts and values. */
public
// sort the rows
int length = 0;
int weight = 0;
}
this.totalWeight = weight;
int fillp = 0;
// sort key is value, so put it in the high 32!
}
}
assert(assertWellFormed(null));
}
/** Histogram of int values, reported compactly as a ragged matrix,
* indexed by descending frequency rank.
* <p>
* Format of matrix:
* Each row in the matrix begins with an occurrence count,
* and continues with all int values that occur at that frequency.
* <pre>
* int[][] matrix = {
* { count1, value11, value12, value13, ... },
* { count2, value21, value22, ... },
* ...
* }
* </pre>
* The first column of the matrix { count1, count2, ... }
* is sorted in descending order, and contains no duplicates.
* Each row of the matrix (apart from its first element)
* is sorted in ascending order, and contains no duplicates.
* That is, each sequence { valuei1, valuei2, ... } is sorted.
*/
public
public
public
public
public
public
}
public
int getTotalWeight() {
return totalWeight;
}
public
int getTotalLength() {
}
/** Returns an array of all values, sorted. */
public
int[] getAllValues() {
return values;
}
/** Returns an array parallel with {@link #getValues},
* with a frequency for each value.
*/
public
int[] getAllFrequencies() {
return counts;
}
public
}
public
}
public
}
public
interface BitMetric {
}
public double getBitLength(int value) {
}
};
return bitMetric;
}
/** bit-length is negative entropy: -H(matrix). */
public
double getBitLength() {
double sum = 0;
}
return sum;
}
/** bit-length in to another coding (cross-entropy) */
public
double sum = 0;
}
}
return sum;
}
static private
}
/** Sort rows and columns.
* Merge adjacent rows with the same key element [0].
* Make a fresh copy of all of it.
*/
if (count <= 0) continue;
}
int prevCount = -1;
int fillp1 = 0;
int fillp2 = 0;
for (int i = 0; ; i++) {
int[] row;
if (rowMapEntry == 0) continue;
} else {
}
// Close off previous run.
int length = 0;
}
int rfillp = 1;
}
int jfillp = 2;
// Detect and squeeze out duplicates.
}
// Reallocate because of lost duplicates.
}
}
}
break;
}
// Now drop missing rows.
}
return matrix;
}
public
int totalUnique = getTotalLength();
int ltotalWeight = getTotalWeight();
int cumWeight = 0;
int cumUnique = 0;
int count = getRowFrequency(i);
int unique = getRowLength(i);
int weight = getRowWeight(i);
double len = getRowBitLength(i);
}
return histTitles;
}
/** Print a report of this histogram.
*/
public
}
/** Print a report of this histogram.
*/
public
}
/** Print a report of this histogram.
*/
public
int totalUnique = getTotalLength();
int ltotalWeight = getTotalWeight();
double tlen = getBitLength();
if (histTitles == null) {
} else {
}
}
}
}
/*
public static
int[][] makeHistogramMatrix(int[] values) {
// Make sure they are sorted.
values = maybeSort(values);
long[] hist2col = computeHistogram2Col(values);
int[][] matrix = makeMatrix(hist2col);
return matrix;
}
*/
private static
// Sort by increasing count, then by increasing value.
}
// Do a join between hist2col (resorted) and countHist.
for (int j = 0; j < repeat; j++) {
}
}
return matrix;
}
private static
// Break apart the entries in hist2col.
// table[0] gets values, table[1] gets entries.
}
return table;
}
/** Simple two-column histogram. Contains repeated counts.
* Assumes input is sorted. Does not sort output columns.
* <p>
* Format of result:
* <pre>
* long[] hist = {
* (count1 << 32) | (value1),
* (count2 << 32) | (value2),
* ...
* }
* </pre>
* In addition, the sequence {valuei...} is guaranteed to be sorted.
* Note that resorting this using Arrays.sort() will reorder the
* entries by increasing count.
*/
private static
switch (sortedValues.length) {
case 0:
return new long[]{ };
case 1:
}
int prevIndex = -1;
int prevCount = 0;
int thisValue;
if (i < sortedValues.length)
thisValue = sortedValues[i];
else
prevCount += 1;
} else {
// Found a new value.
// Save away previous value.
}
prevCount = 1;
prevIndex += 1;
}
}
if (sizeOnly) {
// Finished the sizing pass. Allocate the histogram.
} else {
break; // done
}
}
return hist;
}
/** Regroup the histogram, so that it becomes an approximate histogram
* whose rows are of the given lengths.
* If matrix rows must be split, the latter parts (larger values)
* are placed earlier in the new matrix.
* If matrix rows are joined, they are resorted into ascending order.
* In the new histogram, the counts are averaged over row entries.
*/
private static
long oldEntries = 0;
}
long newEntries = 0;
}
if (newEntries > oldEntries) {
long ok = oldEntries;
ok = 0;
break;
}
}
} else {
}
// Fill pointers.
int i = 0; // into matrix
int jMin = 1;
int njFill = 1;
jMin = 1;
}
}
// compute average count of new group:
}
return newMatrix;
}
public static
for (int i = 0; i < nr; i++) {
}
}
// Build a matrix.
matrix[i][1] = i;
}
}
/** Slice and sort the given input array. */
private static
return valueSequence;
} else {
return slice;
}
}
/** Tell if an array is sorted. */
private static
return false; // found witness to disorder
}
}
return true; // no witness => sorted
}
/** Clone and sort the array, if not already sorted. */
private static
}
return values;
}
/// Debug stuff follows.
/*
// Sanity check.
int weight = 0;
int vlength = 0;
for (int i = 0; i < matrix.length; i++) {
int vlengthi = (matrix[i].length-1);
int count = matrix[i][0];
assert(vlengthi > 0); // no empty rows
assert(count > 0); // no impossible rows
vlength += vlengthi;
weight += count * vlengthi;
}
assert(isSorted(values, 0, true));
// make sure the counts all add up
assert(totalWeight == weight);
assert(vlength == values.length);
assert(vlength == counts.length);
int weight2 = 0;
for (int i = 0; i < counts.length; i++) {
weight2 += counts[i];
}
assert(weight2 == weight);
int[] revcol1 = new int[matrix.length]; //1st matrix colunm
for (int i = 0; i < matrix.length; i++) {
// spot checking: try a random query on each matrix row
assert(matrix[i].length > 1);
revcol1[matrix.length-i-1] = matrix[i][0];
assert(isSorted(matrix[i], 1, true));
int rand = (matrix[i].length+1) / 2;
int val = matrix[i][rand];
int count = matrix[i][0];
int pos = Arrays.binarySearch(values, val);
assert(values[pos] == val);
assert(counts[pos] == matrix[i][0]);
if (valueSequence != null) {
int count2 = 0;
for (int j = 0; j < valueSequence.length; j++) {
if (valueSequence[j] == val) count2++;
}
assert(count2 == count);
}
}
assert(isSorted(revcol1, 0, true));
//*/
return true;
}
/*
public static
int[] readValuesFrom(InputStream instr) {
return readValuesFrom(new InputStreamReader(instr));
}
public static
int[] readValuesFrom(Reader inrdr) {
inrdr = new BufferedReader(inrdr);
final StreamTokenizer in = new StreamTokenizer(inrdr);
final int TT_NOTHING = -99;
in.commentChar('#');
return readValuesFrom(new Iterator() {
int token = TT_NOTHING;
private int getToken() {
if (token == TT_NOTHING) {
try {
token = in.nextToken();
assert(token != TT_NOTHING);
} catch (IOException ee) {
throw new RuntimeException(ee);
}
}
return token;
}
public boolean hasNext() {
return getToken() != StreamTokenizer.TT_EOF;
}
public Object next() {
int ntok = getToken();
token = TT_NOTHING;
switch (ntok) {
case StreamTokenizer.TT_EOF:
throw new NoSuchElementException();
case StreamTokenizer.TT_NUMBER:
return new Integer((int) in.nval);
default:
assert(false);
return null;
}
}
public void remove() {
throw new UnsupportedOperationException();
}
});
}
public static
int[] readValuesFrom(Iterator iter) {
return readValuesFrom(iter, 0);
}
public static
int[] readValuesFrom(Iterator iter, int initSize) {
int[] na = new int[Math.max(10, initSize)];
int np = 0;
while (iter.hasNext()) {
Integer val = (Integer) iter.next();
if (np == na.length) {
int[] na2 = new int[np*2];
System.arraycopy(na, 0, na2, 0, np);
na = na2;
}
na[np++] = val.intValue();
}
if (np != na.length) {
int[] na2 = new int[np];
System.arraycopy(na, 0, na2, 0, np);
na = na2;
}
return na;
}
public static
Histogram makeByteHistogram(byte[] bytes) {
try {
return makeByteHistogram(new ByteArrayInputStream(bytes));
} catch (IOException ee) {
throw new RuntimeException(ee);
}
}
public static
void main(String[] av) throws IOException {
if (av.length > 0 && av[0].equals("-r")) {
int[] values = new int[Integer.parseInt(av[1])];
int limit = values.length;
if (av.length >= 3) {
limit = (int)( limit * Double.parseDouble(av[2]) );
}
Random rnd = new Random();
for (int i = 0; i < values.length; i++) {
values[i] = rnd.nextInt(limit);;
}
Histogram rh = new Histogram(values);
rh.print("random", System.out);
return;
}
if (av.length > 0 && av[0].equals("-s")) {
int[] values = readValuesFrom(System.in);
Random rnd = new Random();
for (int i = values.length; --i > 0; ) {
int j = rnd.nextInt(i+1);
if (j < i) {
int tem = values[i];
values[i] = values[j];
values[j] = tem;
}
}
for (int i = 0; i < values.length; i++)
System.out.println(values[i]);
return;
}
if (av.length > 0 && av[0].equals("-e")) {
// edge cases
new Histogram(new int[][] {
{1, 11, 111},
{0, 123, 456},
{1, 111, 1111},
{0, 456, 123},
{3},
{},
{3},
{2, 22},
{4}
}).print(System.out);
return;
}
if (av.length > 0 && av[0].equals("-b")) {
// edge cases
Histogram bh = makeByteHistogram(System.in);
bh.print("bytes", System.out);
return;
}
boolean regroup = false;
if (av.length > 0 && av[0].equals("-g")) {
regroup = true;
}
int[] values = readValuesFrom(System.in);
Histogram h = new Histogram(values);
if (!regroup)
h.print(System.out);
if (regroup) {
int[] groups = new int[12];
for (int i = 0; i < groups.length; i++) {
groups[i] = 1<<i;
}
int[][] gm = regroupHistogram(h.getMatrix(), groups);
Histogram g = new Histogram(gm);
System.out.println("h.getBitLength(g) = "+
h.getBitLength(g.getBitMetric()));
System.out.println("g.getBitLength(h) = "+
g.getBitLength(h.getBitMetric()));
g.print("regrouped", System.out);
}
}
//*/
}