/* * Copyright 2010 The Apache Software Foundation * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.bizosys.hsearch.index; import java.io.IOException; import java.io.Writer; import java.util.List; import com.bizosys.hsearch.common.HDocument; import com.bizosys.hsearch.common.Storable; import com.bizosys.hsearch.hbase.HReader; import com.bizosys.hsearch.hbase.NVBytes; import com.bizosys.hsearch.schema.IOConstants; import com.bizosys.oneline.ApplicationFault; import com.bizosys.oneline.SystemFault; /** * Documents are the unit of indexing and search. * A Document consists of: * <lu> * <li>Set of fields</li> * <li>Access information</li> * <li>Meta information</li> * <li>Result Display Section</li> * </lu> * <br/> * A document is uniquely identified by the doc merging Id (Bucket) * and the document serial number inside the bucket. * @author karan * */ public class Doc { /** * Term vectors created after parsing the document */ public DocTerms terms = null; /** * The document meta section */ public DocMeta meta = null; /** * Document view and edit access control settings */ public DocAcl acl = null; /** * The result display formats */ public DocTeaser teaser = null; /** * The content section which consists of fields */ public DocContent content = null; /** * From which machine the document is submitted */ public String ipAddress = null; /** * The */ public Long bucketId = null; public Short docSerialId = null; public Doc() { } public Doc(HDocument hDoc) throws SystemFault, ApplicationFault{ this.bucketId = hDoc.bucketId; this.docSerialId = hDoc.docSerialId; this.ipAddress = hDoc.ipAddress; this.meta = new DocMeta(hDoc); this.teaser = new DocTeaser(hDoc); this.content = new DocContent(hDoc); this.acl = new DocAcl(hDoc); this.terms = new DocTerms(); } public Doc(String origId) throws SystemFault, ApplicationFault { /** * Get the mapped Id */ List<NVBytes> mappingB = IdMapping.getKey(origId.getBytes()); if ( null == mappingB) throw new ApplicationFault("Id not found :" + origId); if ( 1 != mappingB.size()) throw new ApplicationFault(mappingB.size() + " Ids found :" + origId); String mappedKey = new String(mappingB.get(0).data); this.bucketId = IdMapping.getBucket(mappedKey); this.docSerialId = IdMapping.getDocSerial(mappedKey); mappingB.clear(); /** * Get the Content */ List<NVBytes> contentB = HReader.getCompleteRow(IOConstants.TABLE_CONTENT, mappedKey.getBytes()); if ( null != contentB) { this.content = new DocContent(contentB ); contentB.clear(); } /** * Get the Meta */ List<NVBytes> previewB = HReader.getCompleteRow( IOConstants.TABLE_PREVIEW, mappedKey.getBytes()); if ( null != previewB) { this.teaser = new DocTeaser(origId.getBytes(), previewB); this.teaser.id = new Storable(origId); for (NVBytes nv : previewB) { if ( Storable.compareBytes(nv.name, IOConstants.META_BYTES)) this.meta = new DocMeta(nv.data); else if ( Storable.compareBytes(nv.name, IOConstants.ACL_BYTES)) this.acl = new DocAcl(nv.data); } previewB.clear(); } } /** * Recycles this document. * Helps GC to garbase collect better. * */ public void recycle() { this.terms.cleanup(); this.meta.cleanup(); this.acl.cleanup(); this.teaser.cleanup(); this.content.cleanup(); bucketId = null; docSerialId = 0; } @Override public String toString() { StringBuilder sb = new StringBuilder(">>>> Document Starts <<<<"); if ( null != bucketId ) sb.append("\n Bucket :").append(bucketId.toString()); if ( null != docSerialId ) sb.append("\n Doc Serial :").append(docSerialId); if ( null != terms ) sb.append("\n Term :").append(terms.toString()); if ( null != acl ) sb.append("\n Acl : ").append(acl.toString()); if ( null != meta ) sb.append("\n Meta :").append(meta.toString()); if ( null != teaser ) sb.append("\n Teaser:").append(teaser.toString()); if ( null != content ) sb.append("\n Content").append(content.toString()); sb.append("\n>>>> Document Ends <<<<\n"); return sb.toString(); } public void toXml(Writer writer) throws IOException { if ( null != bucketId ) writer.append("<b>").append(bucketId.toString()).append("</b>"); if ( null != docSerialId ) writer.append("<n>").append(docSerialId.toString()).append("</n>"); //if ( null != acl ) writer.append("<a>").append(acl.toString()).append("</a>"); if ( null != meta ) meta.toXml(writer); if ( null != teaser ) teaser.toXml(writer); if ( null != content ) content.toXml(writer); } }