/**
* Copyright (C) 2013 Isabel Drost-Fromm
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.isabeldrostfromm.sof.naive;
import java.util.ArrayList;
import java.util.Map;
import java.util.Set;
import org.elasticsearch.common.collect.Sets;
import com.google.common.base.Preconditions;
import com.google.gson.internal.StringMap;
import de.isabeldrostfromm.sof.Example;
import de.isabeldrostfromm.sof.ProviderIterator;
/**
* Iterator for ES results as returned by the Java API.
*
* Given a finished search init objects of this type
* with the resulting hits. For each call to iterator
* a new hit will be parsed and returned as Document
* object.
*
* Implementation is not thread safe!
*
* TODO provide Document not as a copied object but
* as a proxy that directly works on the underlying
* hit and enable lazy Document init/ hit parsing that way.
*
* TODO tests missing
* */
@SuppressWarnings("unchecked")
public class RESTProviderIterator extends ProviderIterator {
@SuppressWarnings("rawtypes")
private ArrayList<StringMap> hits;
private int cursor = -1;
private Vectoriser v = new Vectoriser();
@SuppressWarnings("rawtypes")
public RESTProviderIterator(Map<String, StringMap> result) {
Preconditions.checkNotNull(result);
if (result.isEmpty()) {
this.hits = new ArrayList<StringMap>();
} else {
StringMap obj_1 = result.get("hits");
if (obj_1 == null) System.out.println(result);
hits = (ArrayList<StringMap>) obj_1.get("hits");
}
}
@Override
protected Example parse() {
if ( (cursor + 1) < hits.size()) {
cursor++;
@SuppressWarnings("rawtypes")
StringMap entry = hits.get(cursor);
@SuppressWarnings("rawtypes")
StringMap srcDoc = (StringMap) entry.get("_source");
String body = (String) srcDoc.get("body");
String title = (String) srcDoc.get("title");
double reputation = Double.parseDouble((String) srcDoc.get("reputation_at_post_creation"));
String tag1 = (String) srcDoc.get("tag_1");
String tag2 = (String) srcDoc.get("tag_2");
String tag3 = (String) srcDoc.get("tag_3");
String tag4 = (String) srcDoc.get("tag_4");
String tag5 = (String) srcDoc.get("tag_5");
Set<String> tags = Sets.newHashSet(tag1, tag2, tag3, tag4, tag5);
String state = (String) srcDoc.get("open_status");
return Example.of(v.vectorise(Document.of(body, state, title, reputation, tags)), state);
}
return null;
}
}