/*
* ModeShape (http://www.modeshape.org)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.modeshape.jcr.index.lucene;
import java.math.BigDecimal;
import java.math.BigInteger;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
/**
* Utility for working with Lucene field values.
*
* @since 4.5
*/
public class FieldUtil {
protected static final String ID = ":id:";
protected static final String LENGTH_PREFIX = ":len:";
/**
* Creates the name of the Lucene document field which will store the length of a property.
*
* @param propertyName the name of the property; may not be null
* @return the name of the length field; never {@code null}
*/
public static String lengthField(String propertyName){
return LENGTH_PREFIX + propertyName;
}
protected static Field idField( String key ) {
// the ids are always stored in the document in their bytes format because Lucene performs better with those
return new StringField(ID, new BytesRef(key), Field.Store.YES);
}
protected static TermQuery idQuery( String key ) {
return new TermQuery(idTerm(key));
}
protected static Term idTerm( String key ) {
return new Term(ID, new BytesRef(key));
}
/**
* Creates a canonical string representation of the supplied {@link BigDecimal} value, whereby all string representations are
* lexicographically sortable. This makes it possible to store the wide range of values that can be represented by BigDecimal,
* while still enabling sorting and range queries.
* <p>
* This canonical form represents all decimal values using a prescribed format, which is based upon <a
* href="http://www.mail-archive.com/java-user@lucene.apache.org/msg23632.html">Steven Rowe's suggestion</a> but with
* modifications to handle variable-length exponents (per his suggestion in the last sentence), use spaces between fields on
* where required (for minimal length), and utilize an optimized (e.g., shorter) form when the value is '0' or the exponent is
* '0'. Thus, this format contains only digits (e.g., '0'..'9') and the '-' and 'A' characters.
*
* <pre>
* <significand-sign><exponent-sign><exponent-length> <exponent><significand>
* </pre>
*
* where:
* <ul>
* <li>the <b>significand</b> is the part of the number containing the significant figures, and is a (big) integer value
* obtained from the BigDecimal using {@link BigDecimal#unscaledValue()};</li>
* <li>the <b>exponent</b> is the integer used to define the number of factors of 10 that are applied to the significand,
* obtained by computing <code>value.precision() - value.scale() - 1</code>;</li>
* </ul>
* Thus the fields are defined as:
* <ul>
* <li>the <code><significand-sign></code> is '-' if the significand is negative, '0' if equal to zero, or '1' if positive;
* </li>
* <li>the <code><exponent-sign></code> is '-' if the exponent is negative, '0' if equal to zero, or '1' if positive; if
* '0', then the <code><exponent-length></code> and <code><exponent></code> fields are not written;</li>
* <li>the <code><exponent-length></code> is the postive value representing the length of the <code><exponent></code>,
* and is not included when the <code><exponent-sign></code> is '0';</li>
* <li>the <code><exponent></code> is the integer used to define the number of factors of 10 that are applied to the
* significand, obtained by computing <code>value.precision() - value.scale() - 1</code>;</li>
* <li>the <code><significand></code> is the part of the number containing the significant figures, and is a (big) integer
* value obtained from the BigDecimal using {@link BigDecimal#unscaledValue()};</li>
* </ul>
* In the case of a negative exponent, the <code><exponent-length></code> field is negated such that each digit is replaced
* with <code>(base - digit - 1)</code>.
* </p>
* <p>
* In the case of a negative significand, the <code><significand></code> field is negated such that each digit is replaced
* with <code>(base - digit - 1)</code> and appended by 'A' (which is greater than all other digits) to ensure that
* significands with greater precision are ordered before those that share significand prefixes but have lesser precision. In
* this case, the <code><exponent-length></code> and <code><exponent></code> parts are also negated (unless they already
* are).
* </p>
* <p>
* Thus, the format for a negative BigDecimal value becomes:
*
* <pre>
* -<reversed-exponent-sign><negated-exponent-length> <negated-exponent><significand><sentinel>
* </pre>
*
* where the <code><sentinel></code> is always 'A'. Note that the exponent length field is also negated.
* </p>
* <h3>Examples</h3>
* <p>
* Here are several examples that show BigDecimal values and their corresponding canonical string representation:
*
* <pre>
* +5.E-3 => 1-8 65
* +1.E-2 => 1-8 71
* +1.0E-2 => 1-8 71
* +1.0000E-2 => 1-8 71
* +1.1E-2 => 1-8 711
* +1.11E-2 => 1-8 7111
* +1.2E-2 => 1-8 712
* +5.E-2 => 1-8 75
* +7.3E+2 => 111 273
* +7.4E+2 => 111 274
* +7.45E+2 => 111 2745
* +8.7654E+3 => 111 387654
* </pre>
*
* Here is how a BigDecimal value of {@link BigDecimal#ZERO zero} is represented:
*
* <pre>
* 0.0E0 => 0
* </pre>
*
* BigDecimal values with an exponent of '0' are represented as follows:
*
* <pre>
* +1.2E0 => 1012
* -1.2E0 => -087A
* </pre>
*
* And here are some negative value examples:
*
* <pre>
* -8.7654E+3 => --8 612345A
* -7.45E+2 => --8 7254A
* -7.4E+2 => --8 725A
* -7.3E+2 => --8 726A
* -5.E-2 => -18 24A
* -1.2E-2 => -18 287A
* -1.11E-2 => -18 2888A
* -1.1E-2 => -18 288A
* -1.0000E-2 => -18 28A
* -1.0E-2 => -18 28A
* -1.E-2 => -18 28A
* -5.E-3 => -18 34A
* -5.E-4 => -18 44A
* </pre>
*
* </p>
* <p>
* This canonical form is valid for all values of {@link BigDecimal}.
* </p>
*
* @param value the value to be converted into its canonical form; may not be null
* @return the canonical string representation; never null or empty
* @see #stringToDecimal(String)
*/
public static String decimalToString( BigDecimal value ) {
StringBuilder sb = new StringBuilder();
boolean negate = false;
// <sigificand-sign> field
switch (value.signum()) {
case -1:
sb.append('-');
negate = true;
break;
case 1:
sb.append('1');
break;
default:
return "0";
}
// <exponent-sign>, <exponent-length> and <exponent> fields
long exponent = value.precision() - value.scale() - 1;
if (exponent == 0) {
sb.append('0');
} else {
if (negate) exponent = -exponent;
String exponentField = String.valueOf(Math.abs(exponent));
int length = exponentField.length();
char sign = exponent > 0 ? '1' : '-';
if (exponent < 0) exponentField = negate(exponentField);
// <exponent-length>
String lengthField = String.valueOf(length);
if (negate || exponent < 0) lengthField = negate(lengthField);
sb.append(sign).append(lengthField).append(' ').append(exponentField);
}
// <significand>
if (negate) value = value.negate();
StringBuilder significand = new StringBuilder(value.unscaledValue().toString());
removeTralingZeros(significand);
// Append the significand (and the sentinel character)...
sb.append(negate ? negate(significand).append('A') : significand);
return sb.toString();
}
/**
* Converts the canonical string representation of a {@link BigDecimal} value into the object form.
* <p>
* See {@link #decimalToString(BigDecimal)} to documentation of the canonical form.
* </p>
*
* @param value the canonical string representation; may not be null or empty
* @return the BigDecimal representation; never null
* @see #decimalToString(BigDecimal)
*/
public static BigDecimal stringToDecimal( String value ) {
assert value != null;
assert value.length() != 0;
if ("0".equals(value)) return BigDecimal.ZERO;
boolean negate = false;
if (value.charAt(0) == '-') {
// Negative, so remove the trailing sentinel ...
assert value.charAt(value.length() - 1) == 'A';
value = value.substring(0, value.length() - 1);
negate = true;
}
// <exponent-sign>, <exponent-length> and <exponent> fields
long exponent = 0L;
boolean negateExponent = false;
int endIndex = 0;
switch (value.charAt(1)) {
case '0':
value = value.substring(2);
break;
case '-':
negateExponent = true;
// $FALL-THROUGH$
case '1':
default:
// Read in the <exponent-length>
int indexOfSpace = value.indexOf(' ', 2);
String expLengthField = value.substring(2, indexOfSpace);
if (negate || negateExponent) expLengthField = negate(expLengthField);
int lengthOfExponent = Integer.parseInt(expLengthField);
// Read in the <exponent> (after the space) ...
int startIndex = indexOfSpace + 1;
endIndex = startIndex + lengthOfExponent;
String exponentField = value.substring(startIndex, endIndex);
exponent = Long.parseLong(negateExponent ? negate(exponentField) : exponentField);
if (negate) negateExponent = !negateExponent;
if (negateExponent) exponent = -exponent;
value = value.substring(endIndex);
}
// <significand>
if (negate) {
value = negate(value);
}
BigInteger significand = new BigInteger(value);
int scale = (int)(value.length() - exponent - 1);
// Now create the result ...
return new BigDecimal(negate ? significand.negate() : significand, scale);
}
/**
* Compute the "negated" string, which replaces the digits (0 becomes 9, 1 becomes 8, ... and 9 becomes 0).
*
* @param value the input string; may not be null
* @return the negated string; never null
* @see #negate(StringBuilder)
*/
protected static String negate( String value ) {
return negate(new StringBuilder(value)).toString();
}
/**
* Compute the "negated" string, which replaces the digits (0 becomes 9, 1 becomes 8, ... and 9 becomes 0).
*
* @param value the input string; may not be null
* @return the negated string; never null
* @see #negate(String)
*/
protected static StringBuilder negate( StringBuilder value ) {
for (int i = 0, len = value.length(); i != len; ++i) {
char c = value.charAt(i);
if (c == ' ' || c == '-') continue;
value.setCharAt(i, (char)('9' - c + '0'));
}
return value;
}
/**
* Utility to remove the trailing 0's.
*
* @param sb the input string builder; may not be null
*/
protected static void removeTralingZeros( StringBuilder sb ) {
int endIndex = sb.length();
if (endIndex > 0) {
--endIndex;
int index = endIndex;
while (sb.charAt(index) == '0') {
--index;
}
if (index < endIndex) sb.delete(index + 1, endIndex + 1);
}
}
/* Prevent instantiation */
private FieldUtil() {
}
}