/** * Copyright 2008 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package net.sf.katta.lib.lucene; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Set; import java.util.Vector; import java.util.concurrent.atomic.AtomicInteger; import net.sf.katta.util.MergeSort; import net.sf.katta.util.WritableType; import org.apache.hadoop.io.Writable; import org.apache.lucene.search.Sort; public class Hits implements Writable { private static final long serialVersionUID = -732226190122340208L; private List<List<Hit>> _hitsList = new Vector<List<Hit>>(); private List<Hit> _sortedList; private AtomicInteger _totalHits = new AtomicInteger(); private Set<String> _missingShards = Collections.emptySet(); public List<Hit> getHits() { if (_sortedList == null) { sort(Integer.MAX_VALUE); } return _sortedList; } public void addHits(final List<Hit> hits) { _hitsList.add(hits); } public void readFields(final DataInput in) throws IOException { // final long start = System.currentTimeMillis(); final int listOfListsSize = in.readInt(); _hitsList = new ArrayList<List<Hit>>(listOfListsSize); for (int i = 0; i < listOfListsSize; i++) { final int hitSize = in.readInt(); final List<Hit> hitList = new ArrayList<Hit>(hitSize); for (int j = 0; j < hitSize; j++) { final Hit hit = new Hit(); hit.readFields(in); hitList.add(hit); } _hitsList.add(hitList); } // final long end = System.currentTimeMillis(); // Logger.info("Hits reading took " + (end - start) / 1000.0 + "sec."); } public void write(final DataOutput out) throws IOException { // final long start = System.currentTimeMillis(); out.writeInt(_hitsList.size()); for (final List<Hit> hitList : _hitsList) { out.writeInt(hitList.size()); for (final Hit hit : hitList) { hit.write(out); } } // final long end = System.currentTimeMillis(); // Logger.info("Hits writing took " + (end - start) / 1000.0 + "sec."); } public int size() { return _totalHits.get(); } public void setTotalHits(final int totalHits) { _totalHits.set(totalHits); } public void sort(final int count) { sortCollection(count); } public void fieldSort(Sort sort, WritableType[] fieldTypes, int count) { // TODO merge sort does not work due KATTA-93 final ArrayList<Hit> list = new ArrayList<Hit>(count); final int size = _hitsList.size(); for (int i = 0; i < size; i++) { list.addAll(_hitsList.remove(0)); } _hitsList = new ArrayList<List<Hit>>(); if (!list.isEmpty()) { Collections.sort(list, new FieldSortComparator(sort.getSort(), fieldTypes)); } _sortedList = list.subList(0, Math.min(count, list.size())); } @SuppressWarnings("unchecked") public void sortMerge() { final List<Hit>[] array = _hitsList.toArray(new List[_hitsList.size()]); _hitsList = new ArrayList<List<Hit>>(); _sortedList = MergeSort.merge(array); } /* * Leads to OOM on 2 000 000 elements. */ public void sortOther() { _sortedList = new ArrayList<Hit>(); while (true) { Hit highest = null; final int[] pos = new int[_hitsList.size()]; for (int i = 0; i < pos.length; i++) { pos[i] = 0; } int pointer = 0; for (int i = 0; i < _hitsList.size(); i++) { final List<Hit> list = _hitsList.get(i); if (list.size() > pos[i]) { final Hit hit = list.get(pos[i]); if (highest == null) { highest = hit; pointer = i; } else if (hit.getScore() > highest.getScore()) { highest = hit; pointer = i; } } } if (highest == null) { // no data anymore return; } pos[pointer]++; _sortedList.add(highest); highest = null; } } public void sortOtherII() { _sortedList = new ArrayList<Hit>(); int pos = 0; while (true) { final List<Hit> tmp = new ArrayList<Hit>(_hitsList.size()); for (final List<Hit> hitList : _hitsList) { if (hitList.size() > pos) { tmp.add(hitList.get(pos)); } } if (tmp.size() == 0) { // we are done no new data return; } Collections.sort(tmp); _sortedList.addAll(tmp); pos++; } } /* * Leads on 10 000 000 list to OOM. */ public void sortCollection(final int count) { final ArrayList<Hit> list = new ArrayList<Hit>(); final int size = _hitsList.size(); for (int i = 0; i < size; i++) { list.addAll(_hitsList.remove(0)); } _hitsList = new ArrayList<List<Hit>>(); Collections.sort(list); _sortedList = list.subList(0, Math.min(count, list.size())); } // public int compare(Hit o1, Hit o2) { // final float score2 = o2.getScore(); // final float score1 = o1.getScore(); // if (score1 > score2) { // return 1; // } // return -1; // } public void addTotalHits(final int size) { _totalHits.addAndGet(size); } @Override public String toString() { /* * Don't modify data structure just by viewing it, otherwise * running in a debugger modifies the behavior of the code! */ return "Hits: total=" + _totalHits + ", queue=" + (_hitsList != null ? _hitsList.toString() : "null") + ", sorted=" + (_sortedList != null ? _sortedList.toString() : "null"); } public Set<String> getMissingShards() { return _missingShards; } public void setMissingShards(Set<String> _missingShards) { this._missingShards = _missingShards; } }