DictEntry.java example

Explorer
hsearch-master
- src
/*
* Copyright 2010 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.bizosys.hsearch.dictionary;

import java.io.IOException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;

import com.bizosys.oneline.util.StringUtils;

import com.bizosys.hsearch.common.IStorable;
import com.bizosys.hsearch.common.Storable;

/**
 * Represents an entry in the dictionary
 * @author Abinasha karana
 */
public class DictEntry implements IStorable{
		
	private static final String TYPE_SEPARATOR = "\t";
	
	/**
	 * The stemmed word
	 */
	public String fldWord = null;
	
	/**
	 * The word type
	 */
	public String fldType = null;
	
	/**
	 * Number of documents in which this word is sighted
	 */
	public int fldFreq = 1;
	
	/**
	 * Synonums of this word
	 */
	public String fldRelated = null;
	
	/**
	 * The original unstemmed word
	 */
	public String fldDetailXml = null;
	
	/**
	 * Private Default Constructor 
	 */
	private DictEntry(){}
	
	/**
	 * Constructor, Initialize by deserializing the stored bytes
	 * @param value
	 */
	public DictEntry ( byte[] value) {
		int pos = 0;

		short fldWordLen = Storable.getShort(pos, value);
		pos = pos + 2;
		if ( 0 != fldWordLen) {
			byte[] fldWordB = new byte[fldWordLen];
			System.arraycopy(value, pos, fldWordB, 0, fldWordLen);			
			this.fldWord = Storable.getString(fldWordB);
			pos = pos + fldWordLen;
		}
		
		short fldTypeLen = Storable.getShort(pos, value);
		pos = pos + 2;
		if ( 0 != fldTypeLen) {
			byte[] fldTypeB = new byte[fldTypeLen];
			System.arraycopy(value, pos, fldTypeB, 0, fldTypeLen);
			this.fldType = Storable.getString(fldTypeB);
			pos = pos + fldTypeLen;
		}
		
		this.fldFreq = Storable.getInt(pos, value);
		pos = pos + 4;
		
		short fldRelatedLen = Storable.getShort(pos, value);
		pos = pos + 2;
		if ( 0 != fldRelatedLen) {
			byte[] fldRelatedB = new byte[fldRelatedLen];
			System.arraycopy(value, pos, fldRelatedB, 0, fldRelatedLen);			
			this.fldRelated = Storable.getString(fldRelatedB);
			pos = pos + fldRelatedLen;
		}

		short fldDetailXmlLen = Storable.getShort(pos, value);
		pos = pos + 2;
		if ( 0 != fldDetailXmlLen) {
			byte[] fldDetailXmlB = new byte[fldDetailXmlLen];
			System.arraycopy(value, pos, fldDetailXmlB, 0, fldDetailXmlLen);			
			this.fldDetailXml = Storable.getString(fldDetailXmlB);
			pos = pos + fldDetailXmlLen;
			
		}
	}
		
	/**
	 * Constructor
	 * @param fldWord	The stemmed word
	 */
	public DictEntry(String fldWord) {
		this.fldWord = fldWord;
	}
	
	/**
	 * Constructor
	 * @param fldWord	The stemmed word
	 * @param fldType	The word type
	 * @param fldFreq	No. of documents containing this word 
	 * @param related	Synonums of this word
	 * @param fldDetailXml	Detail about this word like the thesaurus heirarchy
	 */
	public DictEntry(String fldWord, String fldType, 
		int fldFreq, String related, String fldDetailXml) {
		
		this.fldWord = fldWord;
		this.fldType = fldType;
		this.fldFreq = fldFreq;
		this.fldRelated = related;
		this.fldDetailXml = fldDetailXml;
	}

	/**
	 * Constructor
	 * @param fldWord	The stemmed word
	 * @param fldType	The word type
	 * @param fldFreq	No. of documents containing this word 
	 */
	public DictEntry(String fldWord, String fldType, Integer fldFreq ) {
		this.fldWord = fldWord;
		this.fldType = fldType;
		this.fldFreq = fldFreq;
	}
	
	/**
	 * Add synonums word. Add all synonums in a comma separated way.
	 * @param related	Related words
	 */
	public void addRelatedWord(String related) {
		if  (DictionaryLog.l.isDebugEnabled()) DictionaryLog.l.debug(" Related " + related);
		this.fldRelated = related;
	}
	
	/**
	 * Serialize the document entry
	 */
	public byte[] toBytes() {
		
		byte[] fldWordB = ( null == fldWord) ? null : Storable.putString(fldWord);
		byte[] fldTypeB = ( null == fldType) ? null : Storable.putString(fldType);
		byte[] fldFreqB = Storable.putInt(fldFreq);
		byte[] fldRelatedB = ( null == fldRelated) ? null : Storable.putString(fldRelated);
		byte[] fldDetailXmlB = ( null == fldDetailXml) ? null : Storable.putString(fldDetailXml);
		
		int fldWordLen = ( null == fldWordB) ? 0 : fldWordB.length;
		int fldTypeLen = ( null == fldTypeB) ? 0 : fldTypeB.length;
		int fldRelatedLen = ( null == fldRelatedB) ? 0 : fldRelatedB.length;
		int fldDetailXmlLen = ( null == fldDetailXmlB) ? 0 : fldDetailXmlB.length;
		
		int totalBytes = fldWordLen + fldTypeLen + 
			fldFreqB.length + fldRelatedLen + fldDetailXmlLen;
		
		byte[] fldWordLenB = Storable.putShort((short) fldWordLen);
		byte[] fldTypeLenB = Storable.putShort((short) fldTypeLen);
		byte[] fldRelatedLenB = Storable.putShort((short) fldRelatedLen);
		byte[] fldDetailXmlLenB = Storable.putShort((short) fldDetailXmlLen);
		
		byte[] value = new byte[totalBytes + 8]; 
		int pos = 0;
		
		System.arraycopy(fldWordLenB, 0, value, pos, 2);
		pos = pos + 2;
		if ( 0 != fldWordLen) {
			System.arraycopy(fldWordB, 0, value, pos, fldWordLen);
			pos = pos + fldWordLen;
		}
		
		System.arraycopy(fldTypeLenB, 0, value, pos, 2);
		pos = pos + 2;
		if ( 0 != fldTypeLen) {
			System.arraycopy(fldTypeB, 0, value, pos, fldTypeLen);
			pos = pos + fldTypeLen;
		}
		
		System.arraycopy(fldFreqB, 0, value, pos, fldFreqB.length);
		pos = pos + fldFreqB.length;

		System.arraycopy(fldRelatedLenB, 0, value, pos, 2);
		pos = pos + 2;
		if ( 0 != fldRelatedLen) {
			System.arraycopy(fldRelatedB, 0, value, pos, fldRelatedLen);
			pos = pos + fldRelatedLen;
		}
		
		System.arraycopy(fldDetailXmlLenB, 0, value, pos, 2);
		pos = pos + 2;
		if ( 0 != fldDetailXmlLen) {
			System.arraycopy(fldDetailXmlB, 0, value, pos, fldDetailXmlLen);
			pos = pos + fldDetailXmlLen;
		}
		return value;
	}
	
	/**
	 *	Add a type to the word. Example "Bangalore" is a "City" 
	 * @param type	The word type
	 */
	public void addType(String type) {
		if ( null == type) return;
		if ( TYPE_SEPARATOR.equals(type) ) type = "    ";
		if ( null == this.fldType) {
			this.fldType = type;
			return;
		}
		
		//Merge
		if ( this.fldType.indexOf(type) < 0)
			this.fldType = this.fldType + TYPE_SEPARATOR + type;
	}
	
	/**
	 * Get all types associated to this word.
	 * Ex. Hydrogen is a "Molecule" as well as a "Fuel" 
	 * @return	All types
	 */
	public List<String> getTypes() {
	    if (StringUtils.isEmpty(this.fldType)) return null;
	    StringTokenizer tokenizer = new StringTokenizer (this.fldType, TYPE_SEPARATOR);
	    List<String> values = new ArrayList<String>();
	    while (tokenizer.hasMoreTokens()) {
	    	String token = tokenizer.nextToken();
	    	if (StringUtils.isEmpty(token)) continue;
	    	values.add(token);
	    }
	    return values;
	}
	
	/**
	 * Forms a XML representation of this entry
	 * @param writer	Writer
	 * @throws IOException	Write exception
	 */
	public void toXml(Writer writer) throws IOException {
		writer.append("<e>");
		if ( null != this.fldWord ) writer.append("<w>").append(this.fldWord).append("</w>");
		if ( null != this.fldType ) writer.append("<t>").append(this.fldType).append("</t>");
		writer.append("<f>").append(new Integer(this.fldFreq).toString()).append("</f>");
		if ( null != this.fldRelated ) writer.append("<r>").append(this.fldRelated).append("</r>");
		if ( null != this.fldDetailXml ) writer.append("<d>").append(this.fldDetailXml).append("</d>");
		writer.append("</e>");
	}
	
	
	@Override 
	public String toString() {
		StringBuilder sb = new StringBuilder(100);
		sb.append(" Word:").append(this.fldWord);
		sb.append(" , Type:").append(this.fldType);
		sb.append(" , Freq:").append(this.fldFreq);
		sb.append(" , Related:").append(this.fldRelated);
		sb.append(" , Detail:").append(this.fldDetailXml);
		return sb.toString();
	}
}