/*
* Copyright 2010 Bizosys Technologies Limited
*
* Licensed to the Bizosys Technologies Limited (Bizosys) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Bizosys licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.bizosys.hsearch.common;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import org.apache.log4j.Logger;
import com.bizosys.hsearch.common.Account.AccountInfo;
import com.bizosys.hsearch.filter.Storable;
import com.bizosys.hsearch.util.GeoId;
import com.bizosys.oneline.ApplicationFault;
import com.bizosys.oneline.SystemFault;
import com.bizosys.oneline.util.StringUtils;
/**
* This object carries all information necessary for indexing a document.
* This object is also serializable and client can provide it as a
* XML document (REST API).
* @see GeoId
*/
public class HDocument {
public static Logger l = CommonLog.l;
private static final boolean DEBUG_ENABLED = l.isDebugEnabled();
/**
* Document Merged Storage(Bucket) Number
*/
public Long bucketId = null;
/**
* Document Serial number inside the merged storage (Bucket)
*/
public Short docSerialId = null;
/**
* This is the original Id of the document.
* This id usually flows from the original document source
* e.g. Primary Key of a database table. The mapped bucket Id and
* document serial number inside bucket represents uniqueness inside
* the index.
*/
public String key = null;
/**
* URL for accessing the document directly
*/
public String url = null;
/**
* Document title. This also shows in the search result record title
*/
public String title = null;
/**
* The Preview text on the document. It can be URL to an image or inline
* XML information.
*/
public String preview = null;
/**
* The matching section of the search word occurance is picked from
* the cached text sections
*/
public String cacheText = null;
/**
* Document content Fields
*/
public List<Field> fields = null;
/**
* Manually supplied list of citation mentioned in the document
*/
public List<String> citationTo = null;
/**
* Manually supplied list of citations from other documents
*/
public List<String> citationFrom = null;
/**
* Who has view access to this document
*/
public AccessDefn viewPermission = null;
/**
* Who has edit access of this document
*/
public AccessDefn editPermission = null;
/**
* The state of the docucment (Applied, Processed, Active, Inactive)
*/
public String state = null;
/**
* The tenant
*/
public String tenant = null;
/**
* Just the Organization Unit (HR, PRODUCTION, SI)
* If there are multi level separate it with \ or .
*/
public String team = null;
/**
* Easting refers to the eastward-measured distance (or the x-coordinate)
* Use <code>GeoId.convertLatLng</code> method for getting nothing eastering
* from a given latitude and longitude.
*/
public Float eastering = 0.0f;
/**
* northing refers to the northward-measured distance (or the y-coordinate).
* Use <code>GeoId.convertLatLng</code> method for getting nothing eastering
* from a given latitude and longitude.
*/
public Float northing = 0.0f;
/**
* This Default weight of the document. Few examples for computing the weight are
* <lu>
* <li>Editor assigned</li>
* <li>Default weight assigned to the document source e.g. pages from wikipedia.org</li>
* <li>Default weight assigned to the document editor e.g. blogs from CEO</li>
* </lu>
*
*/
public int weight = 0;
/**
* Document Type. It's the record type.
* Use <code>DocumentType</code> class to define default document types.
*/
public String docType = null;
/**
* These are author keywords or meta section of the page
*/
public List<String> tags = null;
/**
* These are user keywords formed from the search terms
*/
public List<String> socialText = null;
/**
* Document creation date
*/
public Date createdOn = null;
/**
* Document updation date
*/
public Date modifiedOn = null;
/**
* When the document is scheduled to die or died
*/
public Date validTill = null;
/**
* From which IP address is this document created.
* This is specially for machine proximity ranking.
*/
public String ipAddress = null;
/**
* High Security setting. During high security,
* the information kept encrypted.
*/
public boolean securityHigh = false;
/**
* By default the sentiment is positive.
*/
public boolean sentimentPositive = true;
/**
* Document Language. Default is English
*/
public Locale locale = Locale.ENGLISH;
private String hsearchKey = null;
public String getTenantDocumentKey() throws ApplicationFault {
if ( null != hsearchKey) return hsearchKey;
hsearchKey = getTenantDocumentKey(this.tenant, key);
return hsearchKey;
}
public static String getTenantDocumentKey(String tenant, String docId) throws ApplicationFault {
if ( StringUtils.isEmpty(tenant) ) throw new ApplicationFault("Unknown tenant.");
if ( StringUtils.isEmpty(docId) ) throw new ApplicationFault("Document Id not available.");
return tenant + "/" + docId;
}
public HDocument() {
}
public HDocument(String tenantName) {
this.tenant = tenantName;
}
/**
* Initialize with a key
* @param key The Original Document Key
* @param tenantName The Unique account Name
*/
public HDocument(String key, String tenantName) {
this.key = key;
this.tenant = tenantName;
}
public void loadBucketAndSerials(AccountInfo acc)
throws ApplicationFault, SystemFault, BucketIsFullException {
//Check for bucket id
if ( null == this.bucketId) {
if ( null != this.docSerialId) throw new ApplicationFault(
"Bucker is absent while document position is present. Data corrupted.");
this.bucketId = Account.getCurrentBucket(acc);
} else {
byte[] givenbucketB = Storable.putLong(this.bucketId);
boolean illegalBucket = true;
for (byte[] allowedBucketB : acc.buckets) {
if ( Storable.compareBytes(givenbucketB, allowedBucketB)) {
illegalBucket = false;
break;
}
}
if ( illegalBucket ) {
String msg = "User is not authorized to operate on " +
this.bucketId + " bucket.";
l.warn(msg);
throw new ApplicationFault( msg );
}
}
if ( null == this.docSerialId) {
try {
this.docSerialId = Account.generateADocumentSerialId(this.bucketId);
} catch (BucketIsFullException ex) {
this.bucketId = Account.getNextBucket(acc);
this.docSerialId = Account.generateADocumentSerialId(this.bucketId);
}
}
if (DEBUG_ENABLED) l.debug("Bucket/DocPos : " + bucketId + "/" + docSerialId);
this.tenant = acc.name;
}
public boolean validate() throws ApplicationFault {
return !( null == key || null == tenant || null == bucketId || null == docSerialId);
}
public void recycle() {
if ( null != citationTo) {
citationTo.clear();
citationTo = null;
}
if ( null != citationFrom) {
citationFrom.clear();
citationFrom = null;
}
if ( null != viewPermission ) {
if ( null != viewPermission.getAccess()) viewPermission.getAccess().clear();
viewPermission = null;
}
if ( null != editPermission ) {
if ( null != editPermission .getAccess()) editPermission .getAccess().clear();
editPermission = null;
}
if ( null != tags ) {
tags.clear();
tags = null;
}
if ( null != socialText ) {
socialText.clear();
socialText = null;
}
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("key=[").append(key).append("]\n");
sb.append("tenant=[").append(tenant).append("]\n");
sb.append("bucketId=[").append(bucketId).append("]\n");
sb.append("docSerialId=[").append(docSerialId).append("]\n");
sb.append("url=[").append(url).append("]\n");
sb.append("title=[").append(title).append("]\n");
return sb.toString();
}
}