Doc.java example

Explorer
hsearch-master
- src
/*
* Copyright 2010 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.bizosys.hsearch.index;

import java.io.IOException;
import java.io.Writer;
import java.util.List;

import com.bizosys.hsearch.common.HDocument;
import com.bizosys.hsearch.common.Storable;
import com.bizosys.hsearch.hbase.HReader;
import com.bizosys.hsearch.hbase.NVBytes;
import com.bizosys.hsearch.schema.IOConstants;
import com.bizosys.oneline.ApplicationFault;
import com.bizosys.oneline.SystemFault;

/**
 * Documents are the unit of indexing and search. 
 * A Document consists of: 
 * <lu>
 * 	<li>Set of fields</li>
 * 	<li>Access information</li>
 * 	<li>Meta information</li>
 * 	<li>Result Display Section</li>
 * </lu>
 * <br/> * A document is uniquely identified by the doc merging Id (Bucket) 
 * and the document serial number inside the bucket.  
 * @author karan
 *
 */
public class Doc {
	
	/**
	 * Term vectors created after parsing the document
	 */
	public DocTerms terms = null;
	
	/**
	 * The document meta section
	 */
	public DocMeta meta = null;
	
	/**
	 * Document view and edit access control settings
	 */
	public DocAcl acl = null;
	
	/**
	 * The result display formats
	 */
	public DocTeaser teaser = null;
	
	/**
	 * The content section which consists of fields
	 */
	public DocContent content = null;
	
	/**
	 * From which machine the document is submitted
	 */
	public String ipAddress = null;
	
	/**
	 * The 
	 */
	public Long bucketId = null;
	public Short docSerialId = null;

	public Doc() {
	}
	
	public Doc(HDocument hDoc) throws SystemFault, ApplicationFault{
		this.bucketId = hDoc.bucketId;
		this.docSerialId = hDoc.docSerialId;
		this.ipAddress = hDoc.ipAddress;
		
		this.meta = new DocMeta(hDoc);
		this.teaser = new DocTeaser(hDoc);
		this.content = new DocContent(hDoc);
		this.acl = new DocAcl(hDoc);
		this.terms = new DocTerms();
	}
	
	public Doc(String origId) throws SystemFault, ApplicationFault {
		
		/**
		 * Get the mapped Id
		 */
		List<NVBytes> mappingB = IdMapping.getKey(origId.getBytes());
		if ( null == mappingB) throw new ApplicationFault("Id not found :" + origId);
		if ( 1 != mappingB.size()) throw new ApplicationFault(mappingB.size() + " Ids found :" + origId);
		String mappedKey = new String(mappingB.get(0).data);
		this.bucketId = IdMapping.getBucket(mappedKey);
		this.docSerialId = IdMapping.getDocSerial(mappedKey);
		mappingB.clear();

		/**
		 * Get the Content
		 */
		List<NVBytes> contentB = HReader.getCompleteRow(IOConstants.TABLE_CONTENT, mappedKey.getBytes());
		if ( null != contentB) {
			this.content = new DocContent(contentB );
			contentB.clear();
		}
		
		/**
		 * Get the Meta
		 */
		List<NVBytes> previewB = HReader.getCompleteRow(
			IOConstants.TABLE_PREVIEW, mappedKey.getBytes());
		if ( null != previewB) {
			this.teaser = new DocTeaser(origId.getBytes(), previewB);
			this.teaser.id = new Storable(origId);
			for (NVBytes nv : previewB) {
				if ( Storable.compareBytes(nv.name, IOConstants.META_BYTES)) 
					this.meta = new DocMeta(nv.data);
				else if ( Storable.compareBytes(nv.name, IOConstants.ACL_BYTES))
					this.acl = new DocAcl(nv.data);
			}
			previewB.clear();
		}
	}
	
	
	/**
	 * Recycles this document.
	 * Helps GC to garbase collect better.
	 *
	 */
	public void recycle() {
		this.terms.cleanup();
		this.meta.cleanup();
		this.acl.cleanup();
		this.teaser.cleanup();
		this.content.cleanup();
		bucketId = null;
		docSerialId = 0;
	}
	

	@Override
	public String toString() {
		StringBuilder sb = new StringBuilder(">>>> Document Starts <<<<");
		if ( null != bucketId ) sb.append("\n Bucket :").append(bucketId.toString());
		if ( null != docSerialId ) sb.append("\n Doc Serial :").append(docSerialId);
		if ( null != terms ) sb.append("\n Term :").append(terms.toString());
		if ( null != acl ) sb.append("\n Acl : ").append(acl.toString());
		if ( null != meta ) sb.append("\n Meta :").append(meta.toString());
		if ( null != teaser ) sb.append("\n Teaser:").append(teaser.toString());
		if ( null != content ) sb.append("\n Content").append(content.toString());
		sb.append("\n>>>> Document Ends <<<<\n");
		return sb.toString();
	}
	
	public void toXml(Writer writer) throws IOException {
		if ( null != bucketId ) writer.append("<b>").append(bucketId.toString()).append("</b>");
		if ( null != docSerialId ) writer.append("<n>").append(docSerialId.toString()).append("</n>");
		//if ( null != acl ) writer.append("<a>").append(acl.toString()).append("</a>");
		if ( null != meta ) meta.toXml(writer);
		if ( null != teaser ) teaser.toXml(writer);
		if ( null != content ) content.toXml(writer);
	}
}