/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
package models;
import com.avaje.ebean.Ebean;
import org.codehaus.jackson.map.ObjectMapper;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.client.Client;
import play.Configuration;
import play.Logger;
import play.Play;
import play.db.ebean.Model;
import javax.persistence.*;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.util.BitSet;
import java.util.List;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
/**
* Created with IntelliJ IDEA.
* User: oyiptong
* Date: 2012-08-10
* Time: 4:13 PM
*/
@Entity
@Table(name="smarts_document")
public class Document extends Model {
public long getId() {
return id;
}
public String getUrl() {
return url;
}
@Id
@GeneratedValue
private long id;
private String url;
@Column(name="topic_distribution")
private String topicDistribution;
@Column(name="features_text")
private String featuresText;
@Column(name="features_bits_text")
private String featuresBitsText;
@Column(name="features_bits")
@Lob
@Basic(fetch = FetchType.EAGER)
private byte[] featuresBits;
@ManyToOne
@JoinColumn(name="topic_model_id", nullable=false)
private TopicModel topicModel;
public TopicModel getTopicModel() { return topicModel;}
public Document(String url, double[] topicDistribution) throws Exception {
this.url = url;
ObjectMapper mapper = new ObjectMapper();
this.topicDistribution = mapper.writeValueAsString(topicDistribution);
Configuration config = Play.application().configuration();
int numProjectionBits = config.getInt("smarts.lsh.numBits");
// 100 dimensions for random projections
BitSet bs = RandomProjection.projectBinaryBytes(topicDistribution, numProjectionBits);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ObjectOutputStream oos = new ObjectOutputStream(baos);
oos.writeObject(bs);
this.featuresBits = baos.toByteArray();
StringBuilder sb = new StringBuilder();
StringBuilder sbBits = new StringBuilder();
for (int i = 0; i < numProjectionBits; i++)
{
if(bs.get(i) == true)
{
sb.append(String.format("one%d ", i));
sbBits.append("1");
} else
{
sb.append(String.format("zero%d ", i));
sbBits.append("0");
}
}
this.featuresText = sb.toString().trim();
this.featuresBitsText = sbBits.toString();
}
public static Finder<Long,Document> find = new Finder<Long,Document>(Long.class, Document.class);
public String getFeaturesText()
{
return featuresText;
}
public String getFeaturesBitsText()
{
return featuresBitsText;
}
public byte[] getFeaturesBits()
{
return featuresBits;
}
public String getTopicDistribution() {
return topicDistribution;
}
@Override
public void save()
{
Ebean.save(this);
ElasticSearch es = ElasticSearch.getElasticSearch();
Client esClient = es.getClient();
try
{
IndexResponse response = esClient.prepareIndex("pancake-smarts", "document", String.format("%d", this.id))
.setSource(jsonBuilder()
.startObject()
.field("features_text", this.featuresText)
.field("features_bits", this.featuresBitsText)
.field("topic_model_id", this.topicModel.getId())
.endObject()
)
.execute().actionGet();
} catch (IOException e)
{
Logger.error(String.format("INDEX FAIL document : %d", this.id));
}
}
@Override
public void delete()
{
ElasticSearch es = ElasticSearch.getElasticSearch();
Client esClient = es.getClient();
esClient.prepareDelete("pancake-smarts", "document", String.format("%d", this.id))
.setOperationThreaded(false)
.execute()
.actionGet();
Ebean.delete(this);
}
}