/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.stats;
import java.lang.invoke.MethodHandles;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.util.Base64;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Various utilities for de/serialization of term stats and collection stats.
*/
public class StatsUtil {
private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
/**
* Make a String representation of {@link CollectionStats}
*/
public static String colStatsToString(CollectionStats colStats) {
StringBuilder sb = new StringBuilder();
sb.append(colStats.field);
sb.append(',');
sb.append(String.valueOf(colStats.maxDoc));
sb.append(',');
sb.append(String.valueOf(colStats.docCount));
sb.append(',');
sb.append(String.valueOf(colStats.sumTotalTermFreq));
sb.append(',');
sb.append(String.valueOf(colStats.sumDocFreq));
return sb.toString();
}
private static CollectionStats colStatsFromString(String data) {
if (data == null || data.trim().length() == 0) {
LOG.warn("Invalid empty collection stats string");
return null;
}
String[] vals = data.split(",");
if (vals.length != 5) {
LOG.warn("Invalid collection stats string, num fields " + vals.length
+ " != 5, '" + data + "'");
return null;
}
String field = vals[0];
try {
long maxDoc = Long.parseLong(vals[1]);
long docCount = Long.parseLong(vals[2]);
long sumTotalTermFreq = Long.parseLong(vals[3]);
long sumDocFreq = Long.parseLong(vals[4]);
return new CollectionStats(field, maxDoc, docCount, sumTotalTermFreq,
sumDocFreq);
} catch (Exception e) {
LOG.warn("Invalid collection stats string '" + data + "': "
+ e.toString());
return null;
}
}
public static String termToString(Term t) {
StringBuilder sb = new StringBuilder();
sb.append(t.field()).append(':');
BytesRef bytes = t.bytes();
sb.append(Base64.byteArrayToBase64(bytes.bytes, bytes.offset, bytes.offset));
return sb.toString();
}
private static Term termFromString(String data) {
if (data == null || data.trim().length() == 0) {
LOG.warn("Invalid empty term value");
return null;
}
int idx = data.indexOf(':');
if (idx == -1) {
LOG.warn("Invalid term data without ':': '" + data + "'");
return null;
}
String field = data.substring(0, idx);
String value = data.substring(idx + 1);
try {
return new Term(field, value);
// XXX this would be more correct
// byte[] bytes = Base64.base64ToByteArray(value);
// return new Term(field, new BytesRef(bytes));
} catch (Exception e) {
LOG.warn("Invalid term value '" + value + "'");
return null;
}
}
public static String termStatsToString(TermStats termStats,
boolean includeTerm) {
StringBuilder sb = new StringBuilder();
if (includeTerm) {
sb.append(termStats.term).append(',');
}
sb.append(String.valueOf(termStats.docFreq));
sb.append(',');
sb.append(String.valueOf(termStats.totalTermFreq));
return sb.toString();
}
private static TermStats termStatsFromString(String data, Term t) {
if (data == null || data.trim().length() == 0) {
LOG.warn("Invalid empty term stats string");
return null;
}
String[] vals = data.split(",");
if (vals.length < 2) {
LOG.warn("Invalid term stats string, num fields " + vals.length
+ " < 2, '" + data + "'");
return null;
}
Term termToUse;
int idx = 0;
if (vals.length == 3) {
idx++;
// with term
Term term = termFromString(vals[0]);
if (term != null) {
termToUse = term;
if (t != null) {
assert term.equals(t);
}
} else { // failed term decoding
termToUse = t;
}
} else {
termToUse = t;
}
if (termToUse == null) {
LOG.warn("Missing term in termStats '" + data + "'");
return null;
}
try {
long docFreq = Long.parseLong(vals[idx++]);
long totalTermFreq = Long.parseLong(vals[idx]);
return new TermStats(termToUse.toString(), docFreq, totalTermFreq);
} catch (Exception e) {
LOG.warn("Invalid termStats string '" + data + "'");
return null;
}
}
public static Map<String,CollectionStats> colStatsMapFromString(String data) {
if (data == null || data.trim().length() == 0) {
return null;
}
Map<String,CollectionStats> map = new HashMap<String,CollectionStats>();
String[] entries = data.split("!");
for (String es : entries) {
CollectionStats stats = colStatsFromString(es);
if (stats != null) {
map.put(stats.field, stats);
}
}
return map;
}
public static String colStatsMapToString(Map<String,CollectionStats> stats) {
if (stats == null || stats.isEmpty()) {
return "";
}
StringBuilder sb = new StringBuilder();
for (Entry<String,CollectionStats> e : stats.entrySet()) {
if (sb.length() > 0) {
sb.append('!');
}
sb.append(colStatsToString(e.getValue()));
}
return sb.toString();
}
public static Map<String,TermStats> termStatsMapFromString(String data) {
if (data == null || data.trim().length() == 0) {
return null;
}
Map<String,TermStats> map = new HashMap<>();
String[] entries = data.split("!");
for (String es : entries) {
TermStats termStats = termStatsFromString(es, null);
if (termStats != null) {
map.put(termStats.term, termStats);
}
}
return map;
}
public static String termStatsMapToString(Map<String,TermStats> stats) {
if (stats == null || stats.isEmpty()) {
return "";
}
StringBuilder sb = new StringBuilder();
for (Entry<String,TermStats> e : stats.entrySet()) {
if (sb.length() > 0) {
sb.append('!');
}
sb.append(termStatsToString(e.getValue(), true));
}
return sb.toString();
}
}