/*
* Copyright 2010 Bizosys Technologies Limited
*
* Licensed to the Bizosys Technologies Limited (Bizosys) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Bizosys licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.bizosys.hsearch.index;
import java.io.IOException;
import java.io.Writer;
import java.util.List;
import com.bizosys.hsearch.common.HDocument;
import com.bizosys.hsearch.filter.PreviewFilterMerged;
import com.bizosys.hsearch.filter.Storable;
import com.bizosys.hsearch.hbase.HReader;
import com.bizosys.hsearch.hbase.NVBytes;
import com.bizosys.hsearch.inpipe.util.ReaderType;
import com.bizosys.hsearch.schema.IOConstants;
import com.bizosys.oneline.ApplicationFault;
import com.bizosys.oneline.SystemFault;
/**
* Documents are the unit of indexing and search.
* A Document consists of:
* <lu>
* <li>Set of fields</li>
* <li>Access information</li>
* <li>Meta information</li>
* <li>Result Display Section</li>
* </lu>
* <br/> * A document is uniquely identified by the doc merging Id (Bucket)
* and the document serial number inside the bucket.
* @author karan
*
*/
public class Doc {
/**
* Term vectors created after parsing the document
*/
public DocTerms terms = null;
/**
* The document meta section
*/
public DocMeta meta = null;
/**
* Document view and edit access control settings
*/
public DocAcl acl = null;
/**
* The result display formats
*/
public DocTeaser teaser = null;
/**
* The content section which consists of fields
*/
public DocContent content = null;
/**
* From which machine the document is submitted
*/
public String ipAddress = null;
/**
* The
*/
public Long bucketId = null;
public Short docSerialId = null;
public String tenant = null;
public Doc() {
}
public Doc(HDocument hDoc) throws SystemFault, ApplicationFault{
if ( ! hDoc.validate()) { throw new ApplicationFault(
"Invalid Document \n" + hDoc.toString());
}
this.tenant = hDoc.tenant;
this.bucketId = hDoc.bucketId;
this.docSerialId = hDoc.docSerialId;
this.ipAddress = hDoc.ipAddress;
this.meta = new DocMeta(hDoc);
this.teaser = new DocTeaser(hDoc);
this.content = new DocContent(hDoc);
this.acl = new DocAcl(hDoc);
this.terms = new DocTerms();
}
public Doc(String tenant, String docId) throws SystemFault, ApplicationFault {
/**
* Get the Bucket_Docpos
*/
String bucketDocPos = IdMapping.getBucket_DocPos(tenant, docId);
this.bucketId = IdMapping.getBucket(bucketDocPos);
this.docSerialId = IdMapping.getDocPos(bucketDocPos);
/**
* Get the Content
*/
List<NVBytes> contentB = HReader.getCompleteRow(IOConstants.TABLE_CONTENT, bucketDocPos.getBytes());
if ( null != contentB) {
this.content = new DocContent(contentB );
contentB.clear();
}
getPreviewMerged();
}
public Doc(long bucketId, short docSerialId, boolean mergePreview) throws SystemFault, ApplicationFault {
if ( IndexLog.l.isDebugEnabled() ) {
IndexLog.l.debug("Doc Initializing with : " + bucketId + "/" + docSerialId);
}
/**
* Get the Bucket_Docpos
*/
this.bucketId = bucketId;
this.docSerialId = docSerialId;
String bucketDocPos = IdMapping.getBucket_DocPos(this.bucketId, this.docSerialId);
/**
* Get the Content
*/
List<NVBytes> contentB = HReader.getCompleteRow(IOConstants.TABLE_CONTENT, bucketDocPos.getBytes());
if ( null != contentB) {
this.content = new DocContent(contentB );
contentB.clear();
}
getPreviewMerged();
}
private void getPreviewMerged() throws SystemFault, ApplicationFault {
PreviewFilterMerged pfm = new PreviewFilterMerged(this.docSerialId);
List<NVBytes> previewB = HReader.getCompleteRow(IOConstants.TABLE_PREVIEW,
Storable.putLong(this.bucketId), pfm);
if ( null == previewB) return;
for (NVBytes nv : previewB) {
char name = new String(nv.name).charAt(0);
if ( name == IOConstants.META_DETAIL_0 )
this.meta = new DocMeta(nv.data);
else if ( name == IOConstants.ACL_DETAIL_0 )
this.acl = new DocAcl(nv.data);
else if ( name == IOConstants.TEASER_DETAIL_0 )
this.teaser = new DocTeaser(nv.data);
}
previewB.clear();
}
/**
* Recycles this document.
* Helps GC to garbase collect better.
*
*/
public void recycle() {
this.terms.cleanup();
this.meta.cleanup();
this.acl.cleanup();
this.teaser.cleanup();
this.content.cleanup();
bucketId = null;
docSerialId = 0;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder(">>>> Document Starts <<<<");
if ( null != bucketId ) sb.append("\n Bucket :").append(bucketId.toString());
if ( null != docSerialId ) sb.append("\n Doc Serial :").append(docSerialId);
if ( null != tenant ) sb.append("\n Tenant :").append(tenant);
if ( null != terms ) sb.append("\n Term :").append(terms.toString());
if ( null != acl ) sb.append("\n Acl : ").append(acl.toString());
if ( null != meta ) sb.append("\n Meta :").append(meta.toString());
if ( null != teaser ) sb.append("\n Teaser:").append(teaser.toString());
if ( null != content ) sb.append("\n Content").append(content.toString());
sb.append("\n>>>> Document Ends <<<<\n");
return sb.toString();
}
public void toXml(Writer writer) throws IOException {
writer.append("<doc>");
if ( null != tenant ) writer.append("<tenant>").append(tenant).append("</tenant>");
if ( null != bucketId ) writer.append("<bucket>").append(bucketId.toString()).append("</bucket>");
if ( null != docSerialId ) writer.append("<serial>").append(docSerialId.toString()).append("</serial>");
if ( null != ipAddress ) writer.append("<ip>").append(ipAddress).append("</ip>");
if ( null != acl ) acl.toXml(writer);
if ( null != meta ) meta.toXml(writer);
if ( null != teaser ) teaser.toXml(writer);
if ( null != content ) content.toXml(writer);
writer.append("</doc>");
}
public transient List<ReaderType> readers = null;
}