/*
* Copyright 2013 Alibaba.com All right reserved. This software is the
* confidential and proprietary information of Alibaba.com ("Confidential
* Information"). You shall not disclose such Confidential Information and shall
* use it only in accordance with the terms of the license agreement you entered
* into with Alibaba.com.
*/
package com.alibaba.simpleimage.analyze.search.engine;
import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import com.alibaba.simpleimage.analyze.search.cluster.Clusterable;
import com.alibaba.simpleimage.analyze.search.tree.KMeansTree;
import com.alibaba.simpleimage.analyze.search.util.SerializationUtils;
/**
* 类ImageEngine.java的实现描述:TODO 类实现描述
*
* @author axman 2013-7-24 下午1:38:57
*/
public class ImageEngine {
private Map<Integer, LinkedList<Integer>> simpleInvertedFile;
private Map<Integer, List<Integer>> simpleHistogramFile;
private Map<Integer, Float> simpleWeightFile;
private int total_images;
private int total_features;
private int total_words;
private KMeansTree tree;
/**
* @param args
*/
public static void main(String[] args) {
}
public void init(String treePath) {
tree = (KMeansTree) SerializationUtils.loadObject(treePath);
simpleHistogramFile = new HashMap<Integer, List<Integer>>();
total_words = tree.getLeafsList().size();
total_images = 0;
total_features = 0;
simpleInvertedFile = new HashMap<Integer, LinkedList<Integer>>(total_words);
}
/**
* @return the total_features
*/
public int getTotal_features() {
return total_features;
}
public List<Score> getRankedList(List<Integer> queryVWList, List<Score> candidate, int topNum) {
float queryNorm = 0f;
float dictNorm = 0f;
Float weight = 0.0f;
List<Score> scoreList = new ArrayList<Score>();
Integer count;
Map<Integer, Integer> queryMap = new HashMap<Integer, Integer>();
for (Integer visualWord : queryVWList) {
count = queryMap.get(visualWord);
if (count == null) {
count = 1;
} else {
count++;
}
queryMap.put(visualWord, count);
}
Iterator<Entry<Integer, Integer>> queryIter = queryMap.entrySet().iterator();
while (queryIter.hasNext()) {
Entry<Integer, Integer> entry = queryIter.next();
weight = simpleWeightFile.get(entry.getKey());
if (weight == null) {
weight = 0.0f;
}
// queryNorm += entry.getValue() * weight * weight;
queryNorm += (1 + Math.log(entry.getValue())) * weight * weight;
}
queryNorm = (float)Math.sqrt(queryNorm);
/*
* for(Integer visualWord : visualWords) { Double d = normQueryMap.get(visualWord); if(d == null) { d = 0.0; } d
* += simpleWeightFile.get(visualWord) / norm; normQueryMap.put(visualWord, d); }
*/
/*
* Iterator<Entry<Integer, Double>> iter = normQueryMap.entrySet().iterator(); while(iter.hasNext()) {
* Entry<Integer, Double> entry = iter.next(); System.out.println(entry.getKey() + ", " + entry.getValue()); }
*/
// ranking using Normalized L2
// Collections.sort(queryVWList);
// filter
if (candidate.size() > topNum) {
candidate = candidate.subList(0, topNum);
}
Iterator<Score> iter = candidate.iterator();
while (iter.hasNext()) {
float sum = 0;
Map<Integer, Integer> dictMap = new HashMap<Integer, Integer>();
Integer docId = iter.next().getIdentity();
List<Integer> dictVWList = simpleHistogramFile.get(docId);
for (Integer visualWord : dictVWList) {
count = dictMap.get(visualWord);
if (count == null) {
count = 1;
} else {
count++;
}
dictMap.put(visualWord, count);
}
Iterator<Entry<Integer, Integer>> dictIter = dictMap.entrySet().iterator();
while (dictIter.hasNext()) {
Entry<Integer, Integer> entry = dictIter.next();
weight = simpleWeightFile.get(entry.getKey());
if (weight == null) {
weight = 0.0f;
}
// dictNorm += entry.getValue() * weight * weight;
dictNorm += (1 + Math.log(entry.getValue())) * weight * weight;
// dictNorm += (1 + Math.log(entry.getValue())) * weight *
// weight;
}
dictNorm = (float)Math.sqrt(dictNorm);
dictIter = dictMap.entrySet().iterator();
while (dictIter.hasNext()) {
Entry<Integer, Integer> entry = dictIter.next();
Integer dictWord = entry.getKey();
Integer dictCount = entry.getValue();
weight = simpleWeightFile.get(entry.getKey());
if (weight == null) {
weight = 0.0f;
}
Integer queryCount = queryMap.get(dictWord);
if (queryCount != null) {
// sum += Math.min(queryCount, dictCount) * weight * weight;
sum += (1 + Math.log(Math.min(queryCount, dictCount))) * weight * weight;
}
}
sum /= (queryNorm * dictNorm);
Score score = new Score(docId, sum);
scoreList.add(score);
}
Collections.sort(scoreList);
return scoreList;
}
public List<Integer> quntinize(List<? extends Clusterable> points) {
return tree.getVisualWords(points);
}
public List<Score> getCandidate(List<Integer> visualWords) {
Map<Integer, Integer> voteMap = new HashMap<Integer, Integer>();
List<Integer> docList;
Iterator<Integer> iter = visualWords.iterator();
Integer vwId;
Integer docId;
Integer vote;
Iterator<Integer> iterDoc;
while (iter.hasNext()) {
vwId = iter.next();
docList = simpleInvertedFile.get(vwId);
if (docList == null) break;
iterDoc = docList.iterator();
while (iterDoc.hasNext()) {
docId = iterDoc.next();
vote = voteMap.get(docId);
if (vote == null) {
vote = 1;
} else {
vote++;
}
voteMap.put(docId, vote);
}
}
// 按照value排序
List<Score> scoreList = new ArrayList<Score>();
Iterator<Entry<Integer, Integer>> iterVote = voteMap.entrySet().iterator();
while (iterVote.hasNext()) {
Entry<Integer, Integer> entry = iterVote.next();
scoreList.add(new Score(entry.getKey(), entry.getValue()));
}
Collections.sort(scoreList);
/*
* ArrayList<Entry<Integer, Integer>> scoreList = new ArrayList<Entry<Integer, Integer>>( voteMap.entrySet());
* Collections.sort(scoreList, new Comparator<Map.Entry<Integer, Integer>>() { public int
* compare(Map.Entry<Integer, Integer> o1, Map.Entry<Integer, Integer> o2) { return (o2.getValue() -
* o1.getValue()); } });
*/
/*
* for(Entry<Integer,Integer> e : scoreList) { System.out.println(e.getKey() + "::::" + e.getValue()); }
*/
return scoreList;
}
public void buildIndex(List<? extends Clusterable> points, int Id) {
List<Integer> visualWords;
LinkedList<Integer> tmpInvertFile;
visualWords = tree.getVisualWords(points);
Collections.sort(visualWords);
simpleHistogramFile.put(Id, visualWords);
Iterator<Integer> vwIter = visualWords.iterator();
while (vwIter.hasNext()) {
Integer vw = vwIter.next();
tmpInvertFile = simpleInvertedFile.get(vw);
if (tmpInvertFile == null) {
tmpInvertFile = new LinkedList<Integer>();
}
if (!tmpInvertFile.contains(Id)) {
tmpInvertFile.add(Id);
}
simpleInvertedFile.put(vw, tmpInvertFile);
}
total_images++;
total_features += points.size();
points.clear();
}
public void buildWeight() {
if (simpleInvertedFile == null) return;
simpleWeightFile = new HashMap<Integer, Float>();
Iterator<Entry<Integer, LinkedList<Integer>>> iter = simpleInvertedFile.entrySet().iterator();
while (iter.hasNext()) {
Entry<Integer, LinkedList<Integer>> entry = iter.next();
Integer vwId = entry.getKey();
Integer docNum = entry.getValue().size();
Float weight = 0.0f;
if (docNum > 0) {
weight = (float)Math.log(total_images / docNum);
}
// System.out.println(weight);
simpleWeightFile.put(vwId, weight);
}
}
public void saveIndex(String indexPath) {
SerializationUtils.saveObject(simpleInvertedFile, new File(indexPath));
}
@SuppressWarnings("unchecked")
public boolean loadIndex(String indexPath) {
simpleInvertedFile = (Map<Integer, LinkedList<Integer>>) SerializationUtils.loadObject(indexPath);
return (simpleInvertedFile != null);
}
public void saveWeight(String weightPath) {
SerializationUtils.saveObject(simpleWeightFile, new File(weightPath));
}
@SuppressWarnings("unchecked")
public boolean loadWeight(String weightPath) {
simpleWeightFile = (Map<Integer, Float>) SerializationUtils.loadObject(weightPath);
return (simpleWeightFile != null);
}
public boolean loadTree(String treePath) {
tree = (KMeansTree) SerializationUtils.loadObject(treePath);
return (tree != null);
}
@SuppressWarnings("unchecked")
public void loadHistogram(String histogramPath) {
simpleHistogramFile = (Map<Integer, List<Integer>>) SerializationUtils.loadObject(histogramPath);
}
public void saveHistogram(String histogramPath) {
SerializationUtils.saveObject(simpleHistogramFile, new File(histogramPath));
}
}