/*
* Copyright 2015 Themistoklis Mavridis <themis.mavridis@issel.ee.auth.gr>.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.thesmartweb.swebrank;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.node.Node;
import com.google.common.collect.SortedSetMultimap;
import com.google.common.collect.TreeMultimap;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map.Entry;
import java.util.stream.Stream;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import static org.elasticsearch.node.NodeBuilder.nodeBuilder;
/**
* Class that contains method that retrieve words from an index in the cluster of ElasticSearch where the content is saved
* @author Themistoklis Mavridis
*/
public class ElasticGetWordList {
/**
* Method gets all the words of all the documents regardless of topic for the ids passed as input
* @param ids It contains all the ids for which the words are going to be captured
* @param config_path configuration directory to get the names of the elastic search indexes
* @return All the words in a List
*/
public List<String> get(List<String> ids, String config_path) {
try {
//Node node = nodeBuilder().client(true).clusterName("lshrankldacluster").node();
//Client client = node.client();
Settings settings = ImmutableSettings.settingsBuilder()
.put("cluster.name","lshrankldacluster").build();
Client client = new TransportClient(settings)
.addTransportAddress(new
InetSocketTransportAddress("localhost", 9300)
);
ReadInput ri = new ReadInput();
List<String> elasticIndexes=ri.GetKeyFile(config_path, "elasticSearchIndexes");
List<String> wordList=new ArrayList<>();
for(String id:ids){
SearchResponse responseSearch = client.prepareSearch(elasticIndexes.get(2))
.setSearchType(SearchType.QUERY_AND_FETCH)
.setQuery(QueryBuilders.idsQuery().ids(id))
.execute()
.actionGet();
XContentBuilder builder = XContentFactory.jsonBuilder();
builder.startObject();
responseSearch.toXContent(builder, ToXContent.EMPTY_PARAMS);
builder.endObject();
String JSONresponse=builder.string();
JsonParser parser = new JsonParser();
JsonObject JSONobject = (JsonObject)parser.parse(JSONresponse);
JsonObject hitsJsonObject = JSONobject.getAsJsonObject("hits");
JsonArray hitsJsonArray = hitsJsonObject.getAsJsonArray("hits");
for(JsonElement hitJsonElement:hitsJsonArray){
JsonObject jsonElementObj= hitJsonElement.getAsJsonObject();
jsonElementObj=jsonElementObj.getAsJsonObject("_source");
JsonArray TopicsArray=jsonElementObj.getAsJsonArray("TopicsWordMap");
for(JsonElement Topic:TopicsArray){
JsonObject TopicObj=Topic.getAsJsonObject();
JsonObject wordsmap = TopicObj.getAsJsonObject("wordsmap");
Set<Map.Entry<String,JsonElement>> entrySet=wordsmap.entrySet();
Iterator<Map.Entry<String, JsonElement>> iterator = entrySet.iterator();
while(iterator.hasNext()){
Map.Entry<String, JsonElement> next = iterator.next();
String word=next.getKey();
wordList.add(word);
}
}
}
}
//node.close();
client.close();
return wordList;
} catch (IOException ex) {
Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex);
List<String> wordList=new ArrayList<>();
return wordList;
}
}
/**
* Method gets all the top N max words for each topic of all the documents with their IDs (of the documents) passed as input.
* @param ids It contains all the ids for which the words are going to be captured
* @param top It contains the number of max words to be returned
* @return All the words in a List
*/
public List<String> getMaxWords(List<String> ids, int top, String config_path) {
try {
ReadInput ri = new ReadInput();
List<String> elasticIndexes=ri.GetKeyFile(config_path, "elasticSearchIndexes");
Settings settings = ImmutableSettings.settingsBuilder()
.put("cluster.name","lshrankldacluster").build();
Client client = new TransportClient(settings)
.addTransportAddress(new
InetSocketTransportAddress("localhost", 9300)
);
//Node node = nodeBuilder().client(true).clusterName("lshrankldacluster").node();
//Client client = node.client();
List<String> MaxwordList=new ArrayList<>();
HashMap<String,Double> wordsMap=new HashMap<>();
SortedSetMultimap<Double,String> wordsMultisorted=TreeMultimap.create();
for(String id:ids){//for every id loop
SearchResponse responseSearch = client.prepareSearch(elasticIndexes.get(2))
.setSearchType(SearchType.QUERY_AND_FETCH)
.setQuery(QueryBuilders.idsQuery().ids(id))
.execute()
.actionGet();//search for this id
//----build an object with the response
XContentBuilder builder = XContentFactory.jsonBuilder();
builder.startObject();
responseSearch.toXContent(builder, ToXContent.EMPTY_PARAMS);
builder.endObject();
String JSONresponse=builder.string();
//----parse the JSON response
JsonParser parser = new JsonParser();
JsonObject JSONobject = (JsonObject)parser.parse(JSONresponse);
JsonObject hitsJsonObject = JSONobject.getAsJsonObject("hits");
JsonArray hitsJsonArray = hitsJsonObject.getAsJsonArray("hits");
//get all the JSON hits (check ElasticSearch typical response format for more)
for(JsonElement hitJsonElement:hitsJsonArray){
JsonObject jsonElementObj= hitJsonElement.getAsJsonObject();
jsonElementObj=jsonElementObj.getAsJsonObject("_source");
JsonArray TopicsArray=jsonElementObj.getAsJsonArray("TopicsWordMap");//get the topics word map (every word has a probability
for(JsonElement Topic:TopicsArray){//for every topic I get the word with the max score
JsonObject TopicObj=Topic.getAsJsonObject();
JsonObject wordsmap = TopicObj.getAsJsonObject("wordsmap");//get the wordmap
Set<Map.Entry<String,JsonElement>> entrySet=wordsmap.entrySet();
Iterator<Map.Entry<String, JsonElement>> iterator = entrySet.iterator();
double max=0.0;
String maxword="";
while(iterator.hasNext()){
Map.Entry<String, JsonElement> next = iterator.next();
if(next.getValue().getAsDouble()>max){
maxword=next.getKey();
max=next.getValue().getAsDouble();
}
}
if(wordsMap.containsKey(maxword)){
if(wordsMap.get(maxword)<max){
wordsMap.put(maxword, max);
}
}
else{
wordsMap.put(maxword, max);
}
}
}
}
//we are going to sort all the max words
Map<String,Double> wordsMapsorted = new HashMap<>();
wordsMapsorted=sortByValue(wordsMap);//sorts the map in ascending fashion
Iterator<Entry<String, Double>> iterator = wordsMapsorted.entrySet().iterator();
//we are going to get the first top words from the list of Max words
int beginindex=0;
//===we find the beginning index
if(wordsMapsorted.entrySet().size()>top){
beginindex=wordsMapsorted.entrySet().size()-top;
}
int index=0;
//if the beginning index is larger we try to find the element
while(index<beginindex){
iterator.next();
index++;
}
//while the maxword list size is smaller than the top number and we have an extra value, add this word
while(MaxwordList.size()<top && iterator.hasNext()){
String word=iterator.next().getKey();
MaxwordList.add(word);
}
client.close();
//node.close();
return MaxwordList;
} catch (IOException ex) {
Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex);
List<String> MaxwordList=new ArrayList<>();
return MaxwordList;
}
}
/**
* Method that sorts a Map
* @param <K> any primitive
* @param <V> any primitive
* @param map the map to be sorted
* @return The map sorted in ascending fashion
*/
public static <K, V extends Comparable<? super V>> Map<K, V>
sortByValue( Map<K, V> map )
{
Map<K,V> result = new LinkedHashMap<>();
Stream <Entry<K,V>> st = map.entrySet().stream();
st.sorted(Comparator.comparing(e -> e.getValue())).forEach(e ->result.put(e.getKey(),e.getValue()));
return result;
}
}