/*
* Copyright 2010 Bizosys Technologies Limited
*
* Licensed to the Bizosys Technologies Limited (Bizosys) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Bizosys licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.bizosys.hsearch.index;
import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import org.apache.commons.lang.StringEscapeUtils;
import com.bizosys.hsearch.common.HDocument;
import com.bizosys.hsearch.filter.IStorable;
import com.bizosys.hsearch.filter.Storable;
import com.bizosys.hsearch.hbase.NV;
import com.bizosys.hsearch.schema.IOConstants;
import com.bizosys.hsearch.util.DataConstants;
import com.bizosys.oneline.util.StringUtils;
/**
* It Stores meta information about the document.
* These meta section helps on dynamic filteration as well as ranking
* during searching mechanism.
* An empty meta is currently only 6 byte length.
* @author karan
*
*/
public class DocMeta implements IStorable, IDimension {
/**
* The state of the docucment (Applied, Processed, Active, Inactive)
*/
public String state = null;
/**
* Just the Organization Unit (HR, PRODUCTION, SI)
* If there are multi level separate it with \ or .
*/
public String team = null;
/**
* Northing of a place
*/
public Float northing = 0.0f;
/**
* Eastering of a place
*/
public Float eastering = 0.0f;
/**
* The Geo House.
*/
public String geoHouse = null;
/**
* Document weight : Integer which biases the ranking algorithm.
* Document weight is lifted based on it's depth, source
* A home page will have more weight than the deeper location.
* documents from Intel page will have more weight
* This could be manually increased to influence the ranking mechanism
*/
public int weight = 0;
/**
* Document Type
* Table Name / File Extension / Dna Name
*/
public String docType = null;
/**
* These are author keywords or meta section of the page
*/
public String tags = null;
/**
* These are user keywords formed from the search terms
*/
public String socialText = null;
/**
* Which date the document is created.
*/
public Date createdOn = null;
/**
* Which date the document is last updated.
*/
public Date modifiedOn = null;
/**
* Till what date this document is valid
*/
public Date validTill = null;
/**
* From which IP address is this document created.
* This is specially for machine proximity ranking.
*/
public int ipHouse = 0;
/**
* High Security setting. During high security,
* the information kept encrypted.
*/
public boolean securityHigh = false;
/**
* By default the sentiment is positive.
*/
public boolean sentimentPositive = true;
public Locale locale = Locale.ENGLISH;
/**
* Default Constructor
*
*/
public DocMeta() {
}
public DocMeta(HDocument hdoc) {
if ( null != hdoc.tags) {
this.tags = StringUtils.listToString(hdoc.tags,
DataConstants.TAG_SEPARATOR_STORED) ;
this.tags = StringEscapeUtils.escapeXml(this.tags);
}
this.createdOn = hdoc.createdOn;
this.modifiedOn = hdoc.modifiedOn;
this.validTill = hdoc.validTill;
this.docType = hdoc.docType;
if ( null != hdoc.eastering) this.eastering = hdoc.eastering;
if ( null != hdoc.northing ) this.northing = hdoc.northing;
if ( null != hdoc.team ) this.team = hdoc.team;
if ( null != hdoc.socialText)
this.socialText = StringUtils.listToString(hdoc.socialText, DataConstants.TAG_SEPARATOR_STORED);
this.securityHigh = hdoc.securityHigh;
this.sentimentPositive = hdoc.sentimentPositive;
if ( null != hdoc.state) this.state = hdoc.state;
this.weight = hdoc.weight;
if ( null != hdoc.locale) this.locale = hdoc.locale;
}
public DocMeta(byte[] bytes) {
fromBytes(bytes,0);
}
/**
* Read the meta information from the byte array.
* Deserialize and initiate
* @param bytes : Serialized bytes
* @param pos : Position from which to read the data section
*/
public DocMeta(byte[] bytes, int pos) {
fromBytes(bytes, pos);
}
public int fromBytes(byte[] bytes, int pos ) {
byte docTypeLen = bytes[pos];
pos++;
if ( 0 != docTypeLen) {
byte[] docTypeB = new byte[docTypeLen];
System.arraycopy(bytes, pos, docTypeB, 0, docTypeLen);
this.docType = Storable.getString(docTypeB);
pos = pos + docTypeLen;
}
byte stateLen = bytes[pos];
pos++;
if ( 0 != stateLen) {
byte[] stateB = new byte[stateLen];
System.arraycopy(bytes, pos, stateB, 0, stateLen);
this.state = Storable.getString(stateB);
pos = pos + stateLen;
}
byte orgUnitLen = bytes[pos];
pos++;
if ( 0 != orgUnitLen) {
byte[] orgUnitB = new byte[orgUnitLen];
System.arraycopy(bytes, pos, orgUnitB, 0, orgUnitLen);
this.team = Storable.getString(orgUnitB);
pos = pos + orgUnitLen;
}
byte geoHouseLen = bytes[pos];
pos++;
if ( 0 != geoHouseLen) {
byte[] geoHouseB = new byte[geoHouseLen];
System.arraycopy(bytes, pos, geoHouseB, 0, geoHouseLen);
this.geoHouse = Storable.getString(geoHouseB);
pos = pos + geoHouseLen;
}
byte flag_1B = bytes[pos++];
boolean[] flag_1 = Storable.byteToBits(flag_1B);
byte flag_2B = bytes[pos++];
boolean[] flag_2 = Storable.byteToBits(flag_2B);
int bitPos = 0;
if ( flag_1[bitPos++]) {
this.eastering = Float.intBitsToFloat(Storable.getInt(pos, bytes));
pos = pos+ 4;
}
if ( flag_1[bitPos++]) {
this.northing = Float.intBitsToFloat(Storable.getInt(pos, bytes));
pos = pos+ 4;
}
if ( flag_1[bitPos++]) {
this.weight = Storable.getInt(pos, bytes);
pos = pos+ 4;
}
if ( flag_1[bitPos++]) {
this.ipHouse = Storable.getInt(pos, bytes);
pos = pos+ 4;
}
this.securityHigh = flag_1[bitPos++];
this.sentimentPositive = flag_1[bitPos++];
if (flag_1[bitPos++]) {
short len = Storable.getShort(pos, bytes);
pos = pos + 2;
byte[] tagsB = new byte[len];
System.arraycopy(bytes, pos, tagsB, 0, len);
this.tags = Storable.getString(tagsB);
pos = pos + tagsB.length;
}
if (flag_1[bitPos++]) {
short len = Storable.getShort(pos, bytes);
pos = pos + 2;
byte[] socialTextB = new byte[len];
System.arraycopy(bytes, pos, socialTextB, 0, len);
this.socialText = Storable.getString(socialTextB);
pos = pos + socialTextB.length;
}
bitPos = 0;
if (flag_2[bitPos++]) {
this.createdOn = new Date(Storable.getLong(pos, bytes));
pos = pos+ 8;
}
if (flag_2[bitPos++]) {
this.modifiedOn = new Date(Storable.getLong(pos, bytes));
pos = pos+ 8;
}
if (flag_2[bitPos++]) {
this.validTill = new Date(Storable.getLong(pos, bytes));
pos = pos+ 8;
}
return pos;
}
/**
* Filteration criteria
*/
public boolean checkActive(Date fromDate, Date toDate) {
return ( (this.modifiedOn.after(fromDate)) &&
this.modifiedOn.before(toDate)) ;
}
/**
* Returns all the necessary fields for processing.
* orgUnit is treated specially. It goes in a column
* This helps to search just on orgUnit fields and then
* retrieve documents.
*
* It stores type.. If the type is * means matches all
*
*/
public byte[] toBytes() {
byte docTypeLen = (byte) 0;
byte[] docTypeB = null;
if ( null != this.docType) {
docTypeB = Storable.putString(this.docType);
docTypeLen = (byte) docTypeB.length;
}
byte stateLen = (byte) 0;
byte[] stateB = null;
if ( null != this.state) {
stateB = Storable.putString(this.state);
stateLen = (byte) stateB.length;
}
byte orgUnitLen = (byte) 0;
byte[] orgUnitB = null;
if ( null != this.team) {
orgUnitB = Storable.putString(this.team);
orgUnitLen = (byte) orgUnitB.length;
}
byte geoHouseLen = (byte) 0;
byte[] geoHouseB = null;
if ( null != this.geoHouse) {
geoHouseB = Storable.putString(this.geoHouse);
geoHouseLen = (byte) geoHouseB.length;
}
boolean isNorthing = false;
byte[] northingB = null;
if ( this.northing != 0.0f) {
isNorthing = true;
northingB = Storable.putInt(Float.floatToIntBits(this.northing));
}
boolean isEastering = false;
byte[] easteringB = null;
if ( this.eastering != 0.0f) {
isEastering = true;
easteringB = Storable.putInt(Float.floatToIntBits(this.eastering));
}
boolean isWeight = false;
byte[] weightB = null;
if ( this.weight != 0) {
isWeight = true;
weightB = Storable.putInt(this.weight);
}
boolean isIpHouse = false;
byte[] iphouseB = null;
if ( this.ipHouse != 0) {
isIpHouse = true;
iphouseB = Storable.putInt(this.ipHouse);
}
boolean isTags = false;
byte[] tagsB = null;
if ( null != this.tags ) {
isTags = true;
tagsB = Storable.putString(this.tags);
}
boolean isSocialText = false;
byte[] socialTextB = null;
if ( null != this.socialText ) {
isSocialText = true;
this.socialText = this.socialText.toLowerCase();
socialTextB = Storable.putString(this.socialText);
}
boolean isBornOn = false;
byte[] bornOnB = null;
if ( null != this.createdOn) {
isBornOn = true;
bornOnB = Storable.putLong(this.createdOn.getTime());
}
boolean isModifiedOn = false;
byte[] modifiedOnB = null;
if ( null != this.modifiedOn) {
isModifiedOn = true;
modifiedOnB = Storable.putLong(this.modifiedOn.getTime());
}
boolean isDeathOn = false;
byte[] deathOnB = null;
if ( null != this.validTill) {
isDeathOn = true;
deathOnB = Storable.putLong(this.validTill.getTime());
}
byte flag_1 = Storable.bitsToByte(new boolean[] {
isEastering, isNorthing, isWeight, isIpHouse, securityHigh, sentimentPositive, isTags, isSocialText});
byte flag_2 = Storable.bitsToByte(new boolean[] {
isBornOn, isModifiedOn, isDeathOn, false, false, false, false, false});
int totalBytes = 1 /** docTypeLen */ +
1 /** stateLen */ + 1 /** orgUnitLen */ + 1 /** geoHouseLen */ +
1 /** dataPresence */ + 1 /** timePresence */ +
docTypeLen + stateLen + orgUnitLen + geoHouseLen;
if ( isEastering) totalBytes = totalBytes + 4;
if ( isNorthing ) totalBytes = totalBytes + 4;
if ( isWeight ) totalBytes = totalBytes + 4;
if ( isIpHouse ) totalBytes = totalBytes + 4;
if ( isTags ) totalBytes = totalBytes + tagsB.length + 2;
if ( isSocialText ) totalBytes =
totalBytes + socialTextB.length + 2;
if ( isBornOn ) totalBytes = totalBytes + 8;
if ( isModifiedOn ) totalBytes = totalBytes + 8;
if ( isDeathOn ) totalBytes = totalBytes + 8;
/**
* Writing Start
*/
byte[] bytes = new byte[totalBytes];
int pos = 0;
bytes[pos++] = docTypeLen;
if ( 0 != docTypeLen)
System.arraycopy(docTypeB, 0, bytes, pos, docTypeLen);
pos = pos + docTypeLen;
bytes[pos++] = stateLen;
if ( 0 != stateLen)
System.arraycopy(stateB, 0, bytes, pos, stateLen);
pos = pos + stateLen;
bytes[pos++] = orgUnitLen;
if ( 0 != orgUnitLen)
System.arraycopy(orgUnitB, 0, bytes, pos, orgUnitLen);
pos = pos + orgUnitLen;
bytes[pos++] = geoHouseLen;
if ( 0 != geoHouseLen)
System.arraycopy(geoHouseB, 0, bytes, pos, geoHouseLen);
pos = pos + geoHouseLen;
bytes[pos] = flag_1;
pos++;
bytes[pos] = flag_2;
pos++;
if ( isEastering) {
System.arraycopy(easteringB, 0, bytes, pos, 4);
pos = pos+ 4;
}
if ( isNorthing ) {
System.arraycopy(northingB, 0, bytes, pos, 4);
pos = pos+ 4;
}
if (isWeight) {
System.arraycopy(weightB, 0, bytes, pos, 4);
pos = pos+ 4;
}
if ( isIpHouse) {
System.arraycopy(iphouseB, 0, bytes, pos, 4);
pos = pos+ 4;
}
if (isTags) {
System.arraycopy(Storable.putShort((short)tagsB.length), 0, bytes, pos, 2);
pos = pos + 2;
System.arraycopy(tagsB, 0, bytes, pos, tagsB.length);
pos = pos+ tagsB.length;
}
if (isSocialText) {
System.arraycopy(Storable.putShort((short)socialTextB.length), 0, bytes, pos, 2);
pos = pos + 2;
System.arraycopy(socialTextB, 0, bytes, pos, socialTextB.length);
pos = pos+ socialTextB.length;
}
if (isBornOn) {
System.arraycopy(bornOnB, 0, bytes, pos, 8);
pos = pos+ 8;
}
if (isModifiedOn) {
System.arraycopy(modifiedOnB, 0, bytes, pos, 8);
pos = pos+ 8;
}
if(isDeathOn) {
System.arraycopy(deathOnB, 0, bytes, pos, 8);
pos = pos+ 8;
}
return bytes;
}
/**
* Cleans up the entire set and make it available for reuse.
*/
public void cleanup() {
this.state = null;
this.team = null;
this.northing = 0.0f;
this.eastering = 0.0f;
this.weight = 0;
this.docType = null;
this.securityHigh = false;
this.tags = null;
this.socialText = null;
this.createdOn = null;
this.modifiedOn = null;
this.validTill = null;
this.ipHouse = 0;
this.geoHouse = null;
}
@Override
public String toString() {
StringWriter writer = new StringWriter();
try {
toXml(writer);
writer.close();
return writer.toString();
// Closing a StringWriter has no effect.
} catch (Exception ex) {
IndexLog.l.fatal(ex);
return ex.getMessage();
}
}
public void toXml(Writer writer) throws IOException {
writer.append("<meta>");
if ( StringUtils.isNonEmpty(this.docType) )
writer.append("<type>").append(this.docType).append("</type>");
if ( 0 != this.weight )
writer.append("<weight>").append(new Integer(this.weight).toString()).append("</weight>");
if ( null != this.createdOn )
writer.append("<created>").append(this.createdOn.toString()).append("</created>");
if ( null != this.validTill)
writer.append("<validtill>").append(this.validTill.toString()).append("</validtill>");
if ( StringUtils.isNonEmpty(this.geoHouse) )
writer.append("<geo>").append(this.geoHouse).append("</geo>");
if ( null != this.modifiedOn)
writer.append("<modified>").append(this.modifiedOn.toString()).append("</modified>");
if ( StringUtils.isNonEmpty(this.team) )
writer.append("<team>").append(this.team).append("</team>");
if ( null != this.tags) {
writer.append("<tags>").append(this.tags.replace(
DataConstants.TAG_SEPARATOR_STORED, DataConstants.TAG_SEPARATOR_SHOWN)).append("</tags>");
}
if ( null != this.socialText) {
writer.append("<social>").append(this.socialText.replace(
DataConstants.TAG_SEPARATOR_STORED, DataConstants.TAG_SEPARATOR_SHOWN)).append("</social>");
}
if ( StringUtils.isNonEmpty(this.state) ) writer.append("<state>").append(this.state).append("</state>");
writer.append("<secure>");
if (securityHigh) writer.append("true");
else writer.append("false");
writer.append("</secure>");
if (!sentimentPositive) {
writer.append("<sentiment>false</sentiment>");
}
writer.append("</meta>");
}
public void toNVs(List<NV> nvs) {
nvs.add(new NV(IOConstants.SEARCH_BYTES,IOConstants.META_BYTES, this));
}
public void addTags(List<String> tagL) {
if (this.tags == null) {
this.tags = StringUtils.listToString(tagL, DataConstants.TAG_SEPARATOR_STORED) ;
} else {
this.tags = this.tags + DataConstants.TAG_SEPARATOR_STORED +
StringUtils.listToString(tagL, DataConstants.TAG_SEPARATOR_STORED) ;
}
}
public List<String> getTags() {
if ( null == tags) return null;
return StringUtils.fastSplit(tags, DataConstants.TAG_SEPARATOR_STORED);
}
public void addSocialText(List<String> socialText) {
if (this.socialText == null) {
this.socialText = StringUtils.listToString(socialText, DataConstants.TAG_SEPARATOR_STORED) ;
} else {
this.socialText = this.socialText + DataConstants.TAG_SEPARATOR_STORED +
StringUtils.listToString(socialText, DataConstants.TAG_SEPARATOR_STORED) ;
}
}
public List<String> getSocialText() {
if ( null == socialText) return null;
return StringUtils.fastSplit(socialText, DataConstants.TAG_SEPARATOR_STORED);
}
}