///*
// * ClueWeb Tools: Hadoop tools for manipulating ClueWeb collections
// *
// * Licensed under the Apache License, Version 2.0 (the "License"); you
// * may not use this file except in compliance with the License. You may
// * obtain a copy of the License at
// *
// * http://www.apache.org/licenses/LICENSE-2.0
// *
// * Unless required by applicable law or agreed to in writing, software
// * distributed under the License is distributed on an "AS IS" BASIS,
// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// * implied. See the License for the specific language governing
// * permissions and limitations under the License.
// */
//
//package org.clueweb.data;
//
//import java.io.IOException;
//
//import org.apache.hadoop.conf.Configuration;
//import org.apache.hadoop.fs.FSDataInputStream;
//import org.apache.hadoop.fs.FileSystem;
//import org.apache.hadoop.fs.Path;
//import org.apache.hadoop.io.WritableUtils;
//import org.clueweb.clueweb12.app.BuildDictionary;
//
//import com.google.common.base.Preconditions;
//
//public class TermStatistics {
// private final int numTerms;
// private final long[] cfs;
// private final int[] dfs;
//
// private long collectionSize;
//
// private long maxCf = 0;
// private int maxCfTerm;
//
// private int maxDf = 0;
// private int maxDfTerm;
//
// /**
// * Creates a {@code CfTable} object.
// *
// * @param file collection frequency data file
// * @throws IOException
// */
// public TermStatistics(Path file) throws IOException {
// this(file, FileSystem.get(new Configuration()));
// }
//
// /**
// * Creates a {@code CfTable} object.
// *
// * @param file collection frequency data file
// * @param fs FileSystem to read from
// * @throws IOException
// */
// public TermStatistics(Path file, FileSystem fs) throws IOException {
// Preconditions.checkNotNull(file);
// Preconditions.checkNotNull(fs);
//
// FSDataInputStream in = fs.open(new Path(file, BuildDictionary.CF_BY_ID_DATA));
// this.numTerms = in.readInt();
//
// cfs = new long[numTerms];
//
// for (int i = 0; i < numTerms; i++) {
// long cf = WritableUtils.readVLong(in);
//
// cfs[i] = cf;
// collectionSize += cf;
//
// if (cf > maxCf) {
// maxCf = cf;
// maxCfTerm = i + 1;
// }
// }
//
// in.close();
//
// in = fs.open(new Path(file, BuildDictionary.DF_BY_ID_DATA));
// if (numTerms != in.readInt() ) {
// throw new IOException("df data and cf data should have the same number of entries!");
// }
//
// dfs = new int[numTerms];
//
// for (int i = 0; i < numTerms; i++) {
// int df = WritableUtils.readVInt(in);
//
// dfs[i] = df;
//
// if (df > maxDf) {
// maxDf = df;
// maxDfTerm = i + 1;
// }
// }
//
// in.close();
// }
//
// public int getDf(int term) {
// if (term <= 0 || term > numTerms) {
// return 0;
// }
// return dfs[term - 1];
// }
//
// public long getCf(int term) {
// if (term <= 0 || term > numTerms) {
// return 0;
// }
//
// return cfs[term - 1];
// }
//
// public long getCollectionSize() {
// return collectionSize;
// }
//
// public int getVocabularySize() {
// return numTerms;
// }
//
// public int getMaxDf() {
// return maxDf;
// }
//
// public long getMaxCf() {
// return maxCf;
// }
//
// public int getMaxDfTerm() {
// return maxDfTerm;
// }
//
// public int getMaxCfTerm() {
// return maxCfTerm;
// }
//
//}