/* * Copyright (C) 2014 Indeed Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing permissions and * limitations under the License. */ package com.indeed.flamdex.simple; import com.google.common.base.Charsets; import com.indeed.util.io.Files; import com.indeed.flamdex.utils.FlamdexUtils; import com.indeed.flamdex.writer.StringFieldWriter; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; /** * @author jsgroth */ class SimpleStringFieldWriter extends SimpleFieldWriter implements StringFieldWriter { private final String outputDirectory; private final String field; private final boolean writeBTreesOnClose; private byte[] lastWrittenTermBytes = new byte[0]; private String currentTerm = null; private SimpleStringFieldWriter(String outputDirectory, String field, boolean writeBTreesOnClose, OutputStream termsOutput, OutputStream docsOutput, long numDocs) { super(termsOutput, docsOutput, numDocs); this.outputDirectory = outputDirectory; this.field = field; this.writeBTreesOnClose = writeBTreesOnClose; } public static String getTermsFilename(String field) { return "fld-"+field+".strterms"; } public static String getDocsFilename(String field) { return "fld-"+field+".strdocs"; } public static SimpleStringFieldWriter open(String outputDirectory, String field, long numDocs, boolean writeBTreesOnClose) throws FileNotFoundException { final OutputStream termsOutput = new BufferedOutputStream(new FileOutputStream(Files.buildPath(outputDirectory, getTermsFilename(field))), 65536); final OutputStream docsOutput = new BufferedOutputStream(new FileOutputStream(Files.buildPath(outputDirectory, getDocsFilename(field))), 65536); return new SimpleStringFieldWriter(outputDirectory, field, writeBTreesOnClose, termsOutput, docsOutput, numDocs); } /** * switch terms * * @param term the next term to write to the index * @throws IOException if there is a file write error * @throws NullPointerException if term is null * @throws IllegalArgumentException if term is not lexicographically greater than the previous term added */ @Override public void nextTerm(String term) throws IOException { if (term == null) throw new NullPointerException("you just had to try, didn't you?"); if (currentTerm != null && currentTerm.compareTo(term) >= 0) { throw new IllegalArgumentException("terms must be in sorted order: "+term+" is not lexicographically greater than "+currentTerm); } internalNextTerm(); currentTerm = term; } @Override protected void writeTermDelta() throws IOException { final byte[] currentTermBytes = currentTerm.getBytes(Charsets.UTF_8); final int prefixLen = getPrefixLen(lastWrittenTermBytes, currentTermBytes, Math.min(lastWrittenTermBytes.length, currentTermBytes.length)); FlamdexUtils.writeVLong(lastWrittenTermBytes.length - prefixLen, termsOutput); FlamdexUtils.writeVLong(currentTermBytes.length - prefixLen, termsOutput); termsOutput.write(currentTermBytes, prefixLen, currentTermBytes.length - prefixLen); lastWrittenTermBytes = currentTermBytes; } @Override protected void writeBTreeIndex() throws IOException { if (writeBTreesOnClose) { SimpleFlamdexWriter.writeStringBTree(outputDirectory, field, new File(outputDirectory, "fld-" + field + ".strindex")); } } private static int getPrefixLen(byte[] a, byte[] b, int n) { for (int i = 0; i < n; ++i) { if (a[i] != b[i]) return i; } return n; } }