/**
* Copyright 2008 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.sf.katta.lib.lucene;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.hadoop.io.Writable;
import org.mortbay.log.Log;
import com.google.common.base.Objects;
public class DocumentFrequencyWritable implements Writable {
private ReadWriteLock _frequenciesLock = new ReentrantReadWriteLock(true);
private Map<TermWritable, Integer> _frequencies = new HashMap<TermWritable, Integer>();
private AtomicLong _numDocs = new AtomicLong();
public void put(final String field, final String term, final int frequency) {
_frequenciesLock.writeLock().lock();
try {
add(new TermWritable(field, term), frequency);
} finally {
_frequenciesLock.writeLock().unlock();
}
}
/**
* Assumes a write lock is already in place.
*
* @param key
* The item that has a frequency.
* @param frequency
* The frequency of the key.
*/
private void add(final TermWritable key, final int frequency) {
int result = frequency;
final Integer frequencyObject = _frequencies.get(key);
if (frequencyObject != null) {
result += frequencyObject;
}
_frequencies.put(key, result);
}
public void putAll(final Map<TermWritable, Integer> frequencyMap) {
_frequenciesLock.writeLock().lock();
try {
final Set<TermWritable> keySet = frequencyMap.keySet();
for (final TermWritable key : keySet) {
add(key, frequencyMap.get(key));
}
} finally {
_frequenciesLock.writeLock().unlock();
}
}
public Integer get(final String field, final String term) {
return get(new TermWritable(field, term));
}
public void addNumDocs(long numDocs) {
if (Long.MAX_VALUE - numDocs - _numDocs.get() < 0) {
Log.warn("max number of documents exceeded " + _numDocs.get() + " + " + numDocs);
numDocs = Long.MAX_VALUE;
}
_numDocs.addAndGet(numDocs);
}
public Integer get(final TermWritable key) {
_frequenciesLock.readLock().lock();
try {
return _frequencies.get(key);
} finally {
_frequenciesLock.readLock().unlock();
}
}
public Map<TermWritable, Integer> getAll() {
return Collections.unmodifiableMap(_frequencies);
}
public void readFields(final DataInput in) throws IOException {
_frequenciesLock.writeLock().lock();
try {
final int size = in.readInt();
for (int i = 0; i < size; i++) {
final TermWritable term = new TermWritable();
term.readFields(in);
final int frequency = in.readInt();
_frequencies.put(term, frequency);
}
_numDocs.set(in.readLong());
} finally {
_frequenciesLock.writeLock().unlock();
}
}
public void write(final DataOutput out) throws IOException {
_frequenciesLock.readLock().lock();
try {
out.writeInt(_frequencies.size());
for (final TermWritable key : _frequencies.keySet()) {
key.write(out);
final Integer frequency = _frequencies.get(key);
out.writeInt(frequency);
}
out.writeLong(_numDocs.get());
} finally {
_frequenciesLock.readLock().unlock();
}
}
public long getNumDocs() {
return _numDocs.get();
}
public int getNumDocsAsInteger() {
if (_numDocs.get() > Integer.MAX_VALUE) {
return Integer.MAX_VALUE;
}
return (int) _numDocs.get();
}
@Override
public String toString() {
return Objects.toStringHelper(this).add("totalNumberOfDocs", getNumDocs()).add("termFrequencies", getAll())
.toString();
}
}