/*
* Copyright 2010 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.bizosys.hsearch.dictionary;
import java.io.IOException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
import com.bizosys.oneline.util.StringUtils;
import com.bizosys.hsearch.common.IStorable;
import com.bizosys.hsearch.common.Storable;
/**
* Represents an entry in the dictionary
* @author Abinasha karana
*/
public class DictEntry implements IStorable{
private static final String TYPE_SEPARATOR = "\t";
/**
* The stemmed word
*/
public String fldWord = null;
/**
* The word type
*/
public String fldType = null;
/**
* Number of documents in which this word is sighted
*/
public int fldFreq = 1;
/**
* Synonums of this word
*/
public String fldRelated = null;
/**
* The original unstemmed word
*/
public String fldDetailXml = null;
/**
* Private Default Constructor
*/
private DictEntry(){}
/**
* Constructor, Initialize by deserializing the stored bytes
* @param value
*/
public DictEntry ( byte[] value) {
int pos = 0;
short fldWordLen = Storable.getShort(pos, value);
pos = pos + 2;
if ( 0 != fldWordLen) {
byte[] fldWordB = new byte[fldWordLen];
System.arraycopy(value, pos, fldWordB, 0, fldWordLen);
this.fldWord = Storable.getString(fldWordB);
pos = pos + fldWordLen;
}
short fldTypeLen = Storable.getShort(pos, value);
pos = pos + 2;
if ( 0 != fldTypeLen) {
byte[] fldTypeB = new byte[fldTypeLen];
System.arraycopy(value, pos, fldTypeB, 0, fldTypeLen);
this.fldType = Storable.getString(fldTypeB);
pos = pos + fldTypeLen;
}
this.fldFreq = Storable.getInt(pos, value);
pos = pos + 4;
short fldRelatedLen = Storable.getShort(pos, value);
pos = pos + 2;
if ( 0 != fldRelatedLen) {
byte[] fldRelatedB = new byte[fldRelatedLen];
System.arraycopy(value, pos, fldRelatedB, 0, fldRelatedLen);
this.fldRelated = Storable.getString(fldRelatedB);
pos = pos + fldRelatedLen;
}
short fldDetailXmlLen = Storable.getShort(pos, value);
pos = pos + 2;
if ( 0 != fldDetailXmlLen) {
byte[] fldDetailXmlB = new byte[fldDetailXmlLen];
System.arraycopy(value, pos, fldDetailXmlB, 0, fldDetailXmlLen);
this.fldDetailXml = Storable.getString(fldDetailXmlB);
pos = pos + fldDetailXmlLen;
}
}
/**
* Constructor
* @param fldWord The stemmed word
*/
public DictEntry(String fldWord) {
this.fldWord = fldWord;
}
/**
* Constructor
* @param fldWord The stemmed word
* @param fldType The word type
* @param fldFreq No. of documents containing this word
* @param related Synonums of this word
* @param fldDetailXml Detail about this word like the thesaurus heirarchy
*/
public DictEntry(String fldWord, String fldType,
int fldFreq, String related, String fldDetailXml) {
this.fldWord = fldWord;
this.fldType = fldType;
this.fldFreq = fldFreq;
this.fldRelated = related;
this.fldDetailXml = fldDetailXml;
}
/**
* Constructor
* @param fldWord The stemmed word
* @param fldType The word type
* @param fldFreq No. of documents containing this word
*/
public DictEntry(String fldWord, String fldType, Integer fldFreq ) {
this.fldWord = fldWord;
this.fldType = fldType;
this.fldFreq = fldFreq;
}
/**
* Add synonums word. Add all synonums in a comma separated way.
* @param related Related words
*/
public void addRelatedWord(String related) {
if (DictionaryLog.l.isDebugEnabled()) DictionaryLog.l.debug(" Related " + related);
this.fldRelated = related;
}
/**
* Serialize the document entry
*/
public byte[] toBytes() {
byte[] fldWordB = ( null == fldWord) ? null : Storable.putString(fldWord);
byte[] fldTypeB = ( null == fldType) ? null : Storable.putString(fldType);
byte[] fldFreqB = Storable.putInt(fldFreq);
byte[] fldRelatedB = ( null == fldRelated) ? null : Storable.putString(fldRelated);
byte[] fldDetailXmlB = ( null == fldDetailXml) ? null : Storable.putString(fldDetailXml);
int fldWordLen = ( null == fldWordB) ? 0 : fldWordB.length;
int fldTypeLen = ( null == fldTypeB) ? 0 : fldTypeB.length;
int fldRelatedLen = ( null == fldRelatedB) ? 0 : fldRelatedB.length;
int fldDetailXmlLen = ( null == fldDetailXmlB) ? 0 : fldDetailXmlB.length;
int totalBytes = fldWordLen + fldTypeLen +
fldFreqB.length + fldRelatedLen + fldDetailXmlLen;
byte[] fldWordLenB = Storable.putShort((short) fldWordLen);
byte[] fldTypeLenB = Storable.putShort((short) fldTypeLen);
byte[] fldRelatedLenB = Storable.putShort((short) fldRelatedLen);
byte[] fldDetailXmlLenB = Storable.putShort((short) fldDetailXmlLen);
byte[] value = new byte[totalBytes + 8];
int pos = 0;
System.arraycopy(fldWordLenB, 0, value, pos, 2);
pos = pos + 2;
if ( 0 != fldWordLen) {
System.arraycopy(fldWordB, 0, value, pos, fldWordLen);
pos = pos + fldWordLen;
}
System.arraycopy(fldTypeLenB, 0, value, pos, 2);
pos = pos + 2;
if ( 0 != fldTypeLen) {
System.arraycopy(fldTypeB, 0, value, pos, fldTypeLen);
pos = pos + fldTypeLen;
}
System.arraycopy(fldFreqB, 0, value, pos, fldFreqB.length);
pos = pos + fldFreqB.length;
System.arraycopy(fldRelatedLenB, 0, value, pos, 2);
pos = pos + 2;
if ( 0 != fldRelatedLen) {
System.arraycopy(fldRelatedB, 0, value, pos, fldRelatedLen);
pos = pos + fldRelatedLen;
}
System.arraycopy(fldDetailXmlLenB, 0, value, pos, 2);
pos = pos + 2;
if ( 0 != fldDetailXmlLen) {
System.arraycopy(fldDetailXmlB, 0, value, pos, fldDetailXmlLen);
pos = pos + fldDetailXmlLen;
}
return value;
}
/**
* Add a type to the word. Example "Bangalore" is a "City"
* @param type The word type
*/
public void addType(String type) {
if ( null == type) return;
if ( TYPE_SEPARATOR.equals(type) ) type = " ";
if ( null == this.fldType) {
this.fldType = type;
return;
}
//Merge
if ( this.fldType.indexOf(type) < 0)
this.fldType = this.fldType + TYPE_SEPARATOR + type;
}
/**
* Get all types associated to this word.
* Ex. Hydrogen is a "Molecule" as well as a "Fuel"
* @return All types
*/
public List<String> getTypes() {
if (StringUtils.isEmpty(this.fldType)) return null;
StringTokenizer tokenizer = new StringTokenizer (this.fldType, TYPE_SEPARATOR);
List<String> values = new ArrayList<String>();
while (tokenizer.hasMoreTokens()) {
String token = tokenizer.nextToken();
if (StringUtils.isEmpty(token)) continue;
values.add(token);
}
return values;
}
/**
* Forms a XML representation of this entry
* @param writer Writer
* @throws IOException Write exception
*/
public void toXml(Writer writer) throws IOException {
writer.append("<e>");
if ( null != this.fldWord ) writer.append("<w>").append(this.fldWord).append("</w>");
if ( null != this.fldType ) writer.append("<t>").append(this.fldType).append("</t>");
writer.append("<f>").append(new Integer(this.fldFreq).toString()).append("</f>");
if ( null != this.fldRelated ) writer.append("<r>").append(this.fldRelated).append("</r>");
if ( null != this.fldDetailXml ) writer.append("<d>").append(this.fldDetailXml).append("</d>");
writer.append("</e>");
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder(100);
sb.append(" Word:").append(this.fldWord);
sb.append(" , Type:").append(this.fldType);
sb.append(" , Freq:").append(this.fldFreq);
sb.append(" , Related:").append(this.fldRelated);
sb.append(" , Detail:").append(this.fldDetailXml);
return sb.toString();
}
}