package org.apache.hadoop.hive.mastiff; import java.io.BufferedInputStream; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Hashtable; import java.util.List; import java.util.Properties; import java.util.Set; import java.util.Stack; import java.util.StringTokenizer; import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import cn.ac.ncic.mastiff.MConstants; import cn.ac.ncic.mastiff.io.coding.Compression.Algorithm; import cn.ac.ncic.mastiff.io.coding.Encoder.CodingType; import cn.ac.ncic.mastiff.io.segmentfile.PageMeta; import cn.ac.ncic.mastiff.io.segmentfile.PageMeta.ScanMode; import cn.ac.ncic.mastiff.io.segmentfile.PageMetaList; import cn.ac.ncic.mastiff.io.segmentfile.PageMetaSection; public class MastiffHandlerUtil { public static final String CF_COLUMN_MAPPING = "mastiff.table.columns.map"; public static final String CF_ALGO = "mastiff.table.algorithms"; public static final String CF_CODING = "mastiff.table.codingtypes"; public static final String CF_TABLE_NAME = "mastiff.table.name"; private static final String udfOp = "UDFOP(.*)$"; private static final Pattern udfOpPattern = Pattern.compile(udfOp); public static final Log LOG = LogFactory.getLog(MastiffHandlerUtil.class); private static ObjectInspector cachedMastiffRowOI = null; private static SingleColEvaluator cachedColEvaluator = null; public static class MTableDesc { public String[] columnNames = null; public TypeInfo[] columnTypes = null; public Algorithm[] clusterAlgos = null; public CodingType[] clusterCodingTypes = null; public int[][] columnsMapping = null; public TypeInfo[][] clusterTypes = null; public Boolean equals(MTableDesc other) { if (this.columnNames.length == other.columnNames.length) { for (int i = 0; i < columnNames.length; i++) { if (!columnNames[i].equals(other.columnNames[i])) { return false; } if (!columnTypes[i].equals(other.columnTypes[i])) { return false; } } } else { return false; } if (this.clusterAlgos.length == other.clusterAlgos.length) { for (int i = 0; i < clusterAlgos.length; i++) { if (!clusterAlgos[i].equals(other.clusterAlgos[i])) { return false; } if (!clusterCodingTypes[i].equals(other.clusterCodingTypes[i])) { return false; } if (columnsMapping[i].length != other.columnsMapping[i].length || clusterTypes[i].length != other.clusterTypes[i].length) { return false; } for (int j = 0; j < columnsMapping[i].length; j++) { if (columnsMapping[i][j] != other.columnsMapping[i][j]) { return false; } if (!clusterTypes[i][j].equals(other.clusterTypes[i][j])) { return false; } } } } else { return false; } return true; } } public static class ColumnDesc { // column family idx public int cf; // column idx in the current cluster public int idxInCf; // column idx in table public int idxInTbl; } public static class ValidColumnsInCF { public int cf; public List<Integer> validColumns = new ArrayList<Integer>(); } /** * Set cf metadata into configuration from Mastiff MetaStore * * @throws Exception */ public static void setCFMeta(Configuration conf, String tableName) throws Exception { String mmsip = conf.get("mastiff.metastore.ip"); if (mmsip == null) { throw new Exception("Mastiff Metastore ip not set in hive-site.xml"); } MastiffMetastoreClient mmc = new MastiffMetastoreClient(mmsip); Hashtable meta = null; meta = mmc.getMetadata(tableName); if (meta != null) { conf.set(CF_COLUMN_MAPPING, (String) meta.get("columnsmap")); conf.set(CF_ALGO, (String) meta.get("algorithm")); conf.set(CF_CODING, (String) meta.get("codingtype")); } else { throw new Exception("No such table exist"); } } /** * Get Column Family definitions from configuration * including each column's compression Algo, coding type and column Mapping */ public static MTableDesc getMTableDesc(Object obj) { MTableDesc desc = new MTableDesc(); StringTokenizer st = null; String str_columnmap, str_algorithms, str_codingtypes; getColumnInfos(desc, obj); if (obj instanceof Configuration) { Configuration conf = (Configuration) obj; str_columnmap = conf.get(MastiffHandlerUtil.CF_COLUMN_MAPPING); str_algorithms = conf.get(MastiffHandlerUtil.CF_ALGO); str_codingtypes = conf.get(MastiffHandlerUtil.CF_CODING); } else if (obj instanceof Properties) { Properties props = (Properties) obj; str_columnmap = props.getProperty(MastiffHandlerUtil.CF_COLUMN_MAPPING); str_algorithms = props.getProperty(MastiffHandlerUtil.CF_ALGO); str_codingtypes = props.getProperty(MastiffHandlerUtil.CF_CODING); } else { return null; } if (str_columnmap != null) { st = new StringTokenizer(str_columnmap, MConstants.CLUSTER_SEP); desc.columnsMapping = new int[st.countTokens()][]; int i = 0; while (st.hasMoreTokens()) { StringTokenizer st1 = new StringTokenizer(st.nextToken(), MConstants.COLUMN_SEP); desc.columnsMapping[i] = new int[st1.countTokens()]; int j = 0; while (st1.hasMoreTokens()) { desc.columnsMapping[i][j] = Integer.valueOf(st1.nextToken()); j++; } i++; } } desc.clusterAlgos = new Algorithm[desc.columnsMapping.length]; desc.clusterCodingTypes = new CodingType[desc.columnsMapping.length]; if (str_algorithms != null) { st = new StringTokenizer(str_algorithms, MConstants.CLUSTER_SEP); int i = 0; while (st.hasMoreTokens()) { desc.clusterAlgos[i] = Algorithm.valueOf(st.nextToken()); i++; } } st = new StringTokenizer(str_codingtypes, MConstants.CLUSTER_SEP); int i = 0; while (st.hasMoreTokens()) { desc.clusterCodingTypes[i] = CodingType.valueOf(st.nextToken()); i++; } return desc; } /** * Get each CF's fields' data types * * @param desc */ public static void getCFTypes(MTableDesc desc) { TypeInfo[] clmTypes = desc.columnTypes; desc.clusterTypes = new TypeInfo[desc.columnsMapping.length][]; for (int i = 0; i < desc.columnsMapping.length; i++) { desc.clusterTypes[i] = new TypeInfo[desc.columnsMapping[i].length]; for (int j = 0; j < desc.columnsMapping[i].length; j++) { desc.clusterTypes[i][j] = clmTypes[desc.columnsMapping[i][j]]; } } } public static void getColumnInfos(MTableDesc tbl, Object obj) { String str_columnNames = null; String str_columnTypes = null; if (obj instanceof Configuration) { Configuration conf = (Configuration) obj; str_columnNames = conf.get(serdeConstants.LIST_COLUMNS); str_columnTypes = conf.get(serdeConstants.LIST_COLUMN_TYPES); } else if (obj instanceof Properties) { Properties props = (Properties) obj; str_columnNames = props.getProperty(serdeConstants.LIST_COLUMNS); str_columnTypes = props.getProperty(serdeConstants.LIST_COLUMN_TYPES); } List<String> columnNames = null; List<TypeInfo> columnTypes = null; if (str_columnNames != null && str_columnNames.length() > 0) { columnNames = Arrays.asList(str_columnNames.split(",")); } else { columnNames = new ArrayList<String>(); } if (str_columnTypes == null) { // Default type: all string StringBuilder sb = new StringBuilder(); for (int i = 0; i < columnNames.size(); i++) { if (i > 0) { sb.append(":"); } sb.append(serdeConstants.STRING_TYPE_NAME); } str_columnTypes = sb.toString(); } columnTypes = TypeInfoUtils .getTypeInfosFromTypeString(str_columnTypes); int vcnum = 0; if (columnNames.contains(VirtualColumn.FILENAME.getName())) { vcnum++; } if (columnNames.contains(VirtualColumn.GROUPINGID.getName())) { vcnum++; } if (columnNames.contains(VirtualColumn.RAWDATASIZE.getName())) { vcnum++; } if (columnNames.contains(VirtualColumn.ROWOFFSET.getName())) { vcnum++; } if (columnNames.contains(VirtualColumn.BLOCKOFFSET.getName())) { vcnum++; } columnNames = columnNames.subList(0, columnNames.size() - vcnum); columnTypes = columnTypes.subList(0, columnTypes.size() - vcnum); tbl.columnNames = columnNames.toArray(new String[columnNames.size()]); tbl.columnTypes = columnTypes.toArray(new TypeInfo[columnTypes.size()]); } /* * Found which column family the column belongs to * and the column's position in that column family */ public static ColumnDesc getCF(MTableDesc tbl, int colIdx) { int[][] cm = tbl.columnsMapping; ColumnDesc cd = new ColumnDesc(); cd.idxInTbl = colIdx; for (int cfIdx = 0; cfIdx < cm.length; cfIdx++) { for (int colInCfIdx = 0; colInCfIdx < cm[cfIdx].length; colInCfIdx++) { if (colIdx == cm[cfIdx][colInCfIdx]) { cd.cf = cfIdx; cd.idxInCf = colInCfIdx; return cd; } } } return null; } /** * Find each column's column family */ public static List<Integer> getCFs(MTableDesc tbl, List<Integer> cols) { List<Integer> CFs = new ArrayList<Integer>(); for (Integer col : cols) { ColumnDesc cd = getCF(tbl, col); if (cd != null) { CFs.add(cd.cf); } else { CFs.add(-1); } } return CFs; } public static HashMap<Integer, ValidColumnsInCF> getCfValidColumns(MTableDesc tbl, List<Integer> validCols) { HashMap<Integer, ValidColumnsInCF> result = new HashMap<Integer, ValidColumnsInCF>(); for (Integer col : validCols) { ColumnDesc cd = getCF(tbl, col); if (cd != null) { ValidColumnsInCF vcic = result.get(cd.cf); if (vcic == null) { vcic = new ValidColumnsInCF(); vcic.cf = cd.cf; vcic.validColumns.add(cd.idxInCf); result.put(cd.cf, vcic); } else { vcic.validColumns.add(cd.idxInCf); } } } return result; } public static void loadPropertiesFromFile(Properties props, String filePath) throws FileNotFoundException, IOException { InputStream in = new BufferedInputStream(new FileInputStream(filePath)); props.load(in); } public static String getTableNameFromFilter(ExprNodeDesc filter) { String tblAlias = null; if (filter instanceof ExprNodeColumnDesc) { return ((ExprNodeColumnDesc) filter).getTabAlias(); } else if (filter instanceof ExprNodeGenericFuncDesc) { List<ExprNodeDesc> children = ((ExprNodeGenericFuncDesc) filter).getChildExprs(); for (ExprNodeDesc child : children) { tblAlias = getTableNameFromFilter(child); if (tblAlias != null) { return tblAlias; } } } return null; } public static Set<String> getColumnNamesFromFilter(ExprNodeDesc filter) { Set<String> colNames = new HashSet<String>(); if (filter instanceof ExprNodeColumnDesc) { colNames.add(((ExprNodeColumnDesc) filter).getColumn()); } else if (filter instanceof ExprNodeGenericFuncDesc) { List<ExprNodeDesc> children = ((ExprNodeGenericFuncDesc) filter).getChildExprs(); for (ExprNodeDesc child : children) { colNames.addAll(getColumnNamesFromFilter(child)); } } return colNames; } public static List<Integer> getIdxFromName(MTableDesc tblDesc, List<String> filterColumnsLst) { List<Integer> cols = new ArrayList<Integer>(); HashMap<String, Integer> nameToIdx = new HashMap<String, Integer>(); for (int i = 0; i < tblDesc.columnNames.length; i++) { nameToIdx.put(tblDesc.columnNames[i], i); } for (String colName : filterColumnsLst) { cols.add(nameToIdx.get(colName)); } return cols; } public static List<Integer> getColIdFromFilter(MTableDesc tblDesc, ExprNodeDesc filter) { List<String> filterColumnsLst = new ArrayList<String>( MastiffHandlerUtil.getColumnNamesFromFilter(filter)); List<Integer> filterColumns = MastiffHandlerUtil.getIdxFromName(tblDesc, filterColumnsLst); Collections.sort(filterColumns); return filterColumns; } public static void getFilterInfo(MTableDesc tblDesc, ExprNodeDesc filter, List<Integer> filterCfs, List<Integer> filterColumns) { filterColumns.addAll(MastiffHandlerUtil.getColIdFromFilter(tblDesc, filter)); HashMap<Integer, ValidColumnsInCF> cfWithCols = MastiffHandlerUtil.getCfValidColumns(tblDesc, filterColumns); for (Integer key : cfWithCols.keySet()) { filterCfs.add(key); } } public static boolean isColInTable(String tblName, ExprNodeDesc filter) { String tblAlias = null; String[] parts = tblName.split("\\."); if (tblName.contains(".")) { tblAlias = parts[1]; } else { tblAlias = parts[0]; } if (tblAlias.equalsIgnoreCase(getTableNameFromFilter(filter))) { return true; } else { return false; } } public static List<Integer> getFittestSplit(List<Integer> filterCfs, PageMetaSection pms) { PageMetaList[] filterPML = new PageMetaList[filterCfs.size()]; PageMetaList[] allPML = pms.getPageMetaLists(); List<Integer> curPageIdxInEachCf = new ArrayList<Integer>(); List<Integer> pageNumInEachCf = new ArrayList<Integer>(); int curRGEnd = 0; List<Integer> rgLst = new ArrayList<Integer>(); int i = 0; for (Integer cf : filterCfs) { filterPML[i++] = allPML[cf]; curPageIdxInEachCf.add(0); pageNumInEachCf.add(allPML[cf].getMetaList().size()); } // LOG.error("Page num in each cf "+pageNumInEachCf); // while (checkPageIdx(pageNumInEachCf, curPageIdxInEachCf)) { // rgLst.add(findSmallestEnd(filterPML, curPageIdxInEachCf)); // }// end while return getEndsAsOrdersList(filterPML); // return rgLst; } public static List<Integer> getEndsAsOrdersList(PageMetaList[] filterPMLs) { Set<Integer> endset = new TreeSet<Integer>(); for (int i = 0; i < filterPMLs.length; i++) { List<PageMeta> curPML = filterPMLs[i].getMetaList(); for (PageMeta curPM : curPML) { endset.add(curPM.startPos + curPM.numPairs - 1); } } return new ArrayList<Integer>(endset); } // public static int findSmallestEnd(PageMetaList[] filterPML, List<Integer> curPageIdxInEachCf) { // List<Integer> curPageEnd = new ArrayList<Integer>(); // for (int i = 0; i < filterPML.length; i++) { // PageMeta curPM = filterPML[i].getMetaList().get(curPageIdxInEachCf.get(i)); // curPageEnd.add(curPM.startPos + curPM.numPairs - 1); // } // int smallestEnd = Collections.min(curPageEnd); // for (int i = 0; i < filterPML.length; i++) { // if (curPageEnd.get(i) == smallestEnd) { // curPageIdxInEachCf.set(i, curPageIdxInEachCf.get(i) + 1); // } // } // // LOG.error("Cur page Idx in each cf "+curPageIdxInEachCf); // return smallestEnd; // } public static boolean checkPageIdx(List<Integer> pageNumInEachCf, List<Integer> curPageIdxInEachCf) { for (int i = 0; i < pageNumInEachCf.size(); i++) { // LOG.error("Cur Idx "+curPageIdxInEachCf+"_"+pageNumInEachCf); // LOG.error("If equals "+(curPageIdxInEachCf.get(i) == pageNumInEachCf.get(i))); if (curPageIdxInEachCf.get(i).equals(pageNumInEachCf.get(i))) { // LOG.error("will return false"); return false; } } return true; } public static List<Integer> getRowsInRG(List<Integer> rgs) { List<Integer> results = new ArrayList<Integer>(); for (int i = 0; i < rgs.size(); i++) { if (i == 0) { results.add(rgs.get(i) + 1); continue; } results.add(rgs.get(i) - rgs.get(i - 1)); } return results; } public static ScanMode[] getScanModesForRG(MTableDesc tblDesc, ExprNodeDesc filter, PageMetaSection pms, List<Integer> rgs) { Stack<ScanMode[]> tmpResult = new Stack<ScanMode[]>(); getScanModesForRG(tblDesc, tmpResult, filter, pms, rgs); return tmpResult.pop(); } public static void getScanModesForRG(MTableDesc tblDesc, Stack<ScanMode[]> tmpResult, ExprNodeDesc filter, PageMetaSection pms, List<Integer> rgs) { boolean leaf = false; if (cachedColEvaluator == null) { cachedColEvaluator = new SingleColEvaluator(tblDesc); } if (filter instanceof ExprNodeGenericFuncDesc) { GenericUDF udf = ((ExprNodeGenericFuncDesc) filter).getGenericUDF(); UDFType type = null; String udfname = udf.getClass().getSimpleName(); Matcher m = udfOpPattern.matcher(udfname); if (m.find()) { type = UDFType.fromString(m.group(1).toLowerCase()); } else if (udf instanceof GenericUDFBridge) { type = UDFType.fromString("bridge"); } else { throw new UnsupportedOperationException("Do not support this expression: " + filter.getExprString()); } List<ExprNodeDesc> children = ((ExprNodeGenericFuncDesc) filter).getChildExprs(); for (ExprNodeDesc child : children) { if (child instanceof ExprNodeColumnDesc || child instanceof ExprNodeConstantDesc) { leaf = true; } } if (leaf) { // generate scanmap for rg List<Integer> col = getColIdFromFilter(tblDesc, filter); assert (col.size() == 1); ColumnDesc cd = getCF(tblDesc, col.get(0)); PageMetaList pml = pms.getPageMetaLists()[cd.cf]; List<PageMeta> mts = pml.getMetaList(); ScanMode[] sms = new ScanMode[mts.size()]; int i = 0; for (PageMeta pm : mts) { Boolean maxret = cachedColEvaluator.evaluate(tblDesc, filter, Arrays.asList(pm.max)); Boolean minret = cachedColEvaluator.evaluate(tblDesc, filter, Arrays.asList(pm.min)); if (Boolean.TRUE.equals(maxret) && Boolean.TRUE.equals(minret)) { if (type == UDFType.BRIDGE) { sms[i++] = ScanMode.Rough; } else { sms[i++] = ScanMode.Positive; } } else if (Boolean.FALSE.equals(maxret) && Boolean.FALSE.equals(minret)) { switch (type) { case EQ: sms[i++] = intoEqual(tblDesc, filter, cd, pm); break; case BRIDGE: sms[i++] = ScanMode.Rough; break; default: sms[i++] = ScanMode.Negative; } } else { sms[i++] = ScanMode.Rough; } } ScanMode[] rgsms = extendScanModesToRG(sms, mts, rgs); tmpResult.push(rgsms); } else { for (ExprNodeDesc child : children) { getScanModesForRG(tblDesc, tmpResult, child, pms, rgs); } ScanMode[] result = null; switch (type) { case AND: result = and(tmpResult.pop(), tmpResult.pop()); break; case OR: result = or(tmpResult.pop(), tmpResult.pop()); break; case NOT: result = not(tmpResult.pop()); break; default: throw new UnsupportedOperationException("Do not support this expression: " + filter.getExprString()); } tmpResult.push(result); } } }// end method public static ScanMode getScanModeForSegment(MTableDesc tblDesc, ExprNodeDesc filter, PageMeta[] cfMts) { Stack<ScanMode[]> tmpResult = new Stack<ScanMode[]>(); getScanModeForSegment(tblDesc, tmpResult, filter, cfMts); return tmpResult.pop()[0]; } public static void getScanModeForSegment(MTableDesc tblDesc, Stack<ScanMode[]> tmpResult, ExprNodeDesc filter, PageMeta[] cfMts) { boolean leaf = false; if (cachedColEvaluator == null) { cachedColEvaluator = new SingleColEvaluator(tblDesc); } if (filter instanceof ExprNodeGenericFuncDesc) { GenericUDF udf = ((ExprNodeGenericFuncDesc) filter).getGenericUDF(); UDFType type = null; String udfname = udf.getClass().getSimpleName(); Matcher m = udfOpPattern.matcher(udfname); if (m.find()) { type = UDFType.fromString(m.group(1).toLowerCase()); } else if (udf instanceof GenericUDFBridge) { type = UDFType.fromString("bridge"); } else { throw new UnsupportedOperationException("Do not support this expression: " + filter.getExprString()); } List<ExprNodeDesc> children = ((ExprNodeGenericFuncDesc) filter).getChildExprs(); for (ExprNodeDesc child : children) { if (child instanceof ExprNodeColumnDesc || child instanceof ExprNodeConstantDesc) { leaf = true; } } if (leaf) { // generate scanmap for rg List<Integer> col = getColIdFromFilter(tblDesc, filter); assert (col.size() == 1); ColumnDesc cd = getCF(tblDesc, col.get(0)); ScanMode sm = null; int i = 0; PageMeta pm = cfMts[cd.cf]; Boolean maxret = cachedColEvaluator.evaluate(tblDesc, filter, Arrays.asList(pm.max)); Boolean minret = cachedColEvaluator.evaluate(tblDesc, filter, Arrays.asList(pm.min)); if (Boolean.TRUE.equals(maxret) && Boolean.TRUE.equals(minret)) { if (type == UDFType.BRIDGE) { sm = ScanMode.Rough; } else { sm = ScanMode.Positive; } } else if (Boolean.FALSE.equals(maxret) && Boolean.FALSE.equals(minret)) { switch (type) { case EQ: sm = intoEqual(tblDesc, filter, cd, pm); break; case BRIDGE: sm = ScanMode.Rough; break; default: sm = ScanMode.Negative; } } else { sm = ScanMode.Rough; } tmpResult.push(new ScanMode[] {sm}); } else { for (ExprNodeDesc child : children) { getScanModeForSegment(tblDesc, tmpResult, child, cfMts); } ScanMode[] result = null; switch (type) { case AND: result = and(tmpResult.pop(), tmpResult.pop()); break; case OR: result = or(tmpResult.pop(), tmpResult.pop()); break; case NOT: result = not(tmpResult.pop()); break; default: throw new UnsupportedOperationException("Do not support this expression: " + filter.getExprString()); } tmpResult.push(result); } } } private static ScanMode intoEqual(MTableDesc tblDesc, ExprNodeDesc filter, ColumnDesc cd, PageMeta pm) { if (cachedColEvaluator == null) { cachedColEvaluator = new SingleColEvaluator(tblDesc); } ExprNodeGenericFuncDesc filterClone = (ExprNodeGenericFuncDesc) filter.clone(); filterClone.setGenericUDF(new GenericUDFOPGreaterThan()); ExprNodeGenericFuncDesc filterClone2 = (ExprNodeGenericFuncDesc) filter.clone(); filterClone2.setGenericUDF(new GenericUDFOPLessThan()); Boolean gt = cachedColEvaluator.evaluate(tblDesc, filterClone, Arrays.asList(pm.min)); Boolean lt = cachedColEvaluator.evaluate(tblDesc, filterClone2, Arrays.asList(pm.max)); if (Boolean.TRUE.equals(gt) || Boolean.TRUE.equals(lt)) { return ScanMode.Negative; } else { return ScanMode.Rough; } } public static ScanMode[] extendScanModesToRG(ScanMode[] sms, List<PageMeta> mts, List<Integer> rgs) { ScanMode[] rgsms = new ScanMode[rgs.size()]; int idxInPage = 0; List<Integer> pageEnds = getPageEnds(mts); for (int idx = 0; idx < rgs.size(); idx++) { if (rgs.get(idx) <= pageEnds.get(idxInPage)) { rgsms[idx] = sms[idxInPage]; } else if (rgs.get(idx) > pageEnds.get(idxInPage)) { idxInPage++; rgsms[idx] = sms[idxInPage]; } } return rgsms; } public static List<Integer> getPageEnds(List<PageMeta> mts) { List<Integer> pageEnds = new ArrayList<Integer>(); for (PageMeta pm : mts) { pageEnds.add(pm.startPos + pm.numPairs - 1); } return pageEnds; } public static ScanMode[] and(ScanMode[] left, ScanMode[] right) { ScanMode[] result = new ScanMode[left.length]; for (int i = 0; i < left.length; i++) { if (left[i] == ScanMode.Negative || right[i] == ScanMode.Negative) { result[i] = ScanMode.Negative; } else if (left[i] == ScanMode.Rough || right[i] == ScanMode.Rough) { result[i] = ScanMode.Rough; } else { result[i] = ScanMode.Positive; } } return result; } public static ScanMode[] or(ScanMode[] left, ScanMode[] right) { ScanMode[] result = new ScanMode[left.length]; for (int i = 0; i < left.length; i++) { if (left[i] == ScanMode.Positive || right[i] == ScanMode.Positive) { result[i] = ScanMode.Positive; } else if (left[i] == ScanMode.Rough || right[i] == ScanMode.Rough) { result[i] = ScanMode.Rough; } else { result[i] = ScanMode.Negative; } } return result; } public static ScanMode[] not(ScanMode[] left) { ScanMode[] result = new ScanMode[left.length]; for (int i = 0; i < left.length; i++) { if (left[i] == ScanMode.Positive) { result[i] = ScanMode.Negative; } else if (left[i] == ScanMode.Negative) { result[i] = ScanMode.Positive; } else { result[i] = ScanMode.Rough; } } return result; } public static long getSegmentLineNum(PageMetaSection pms) { long count = 0; PageMetaList fstCF = pms.getPageMetaLists()[0]; List<PageMeta> pml = fstCF.getMetaList(); for (PageMeta pm : pml) { count += pm.numPairs; } return count; } } enum UDFType { LT("LessThan"), LT_EQ("EqualOrLessThan"), EQ("Equal"), GT_EQ("EqualOrGreaterThan"), GT("GreaterThan"), NOT("Not"), AND("And"), OR("Or"), BRIDGE("Bridge"); private final String text; UDFType(String text) { this.text = text; } public String getText() { return this.text; } public static UDFType fromString(String text) { if (text != null) { for (UDFType t : UDFType.values()) { if (text.equalsIgnoreCase(t.text)) { return t; } } } return null; } }