StringPool.java example

Explorer
platform_build-master
- buck-master
/*
 * Copyright 2017-present Facebook, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License. You may obtain
 * a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations
 * under the License.
 */

package com.facebook.buck.android.resources;

import com.google.common.base.Charsets;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import java.nio.ByteBuffer;
import java.util.List;

/**
 * A StringPool consists of a header: ResChunk_header u32 chunk_type u32 header_size u32 chunk_size
 * u32 string_count u32 style_count u32 flags - 0x1 sorted, 0x100 utf-8 encoded u32 strings_start -
 * byte offset from the beginning to the style data u32 styles_start - byte offset from the
 * beginning to the style data
 *
 * <p>The header is followed by an u32[string_count] array of strings offsets (relative to
 * strings_start) where the strings reside. This is then followed by a similar array for styles.
 *
 * <p>In a utf-8 encoded string pool, a string data consists of: utf-16 length, utf-8 length, string
 * bytes, \0.
 *
 * <p>In a utf-16 encoded string pool, a string data consists of: utf-16 length, string bytes, \0\0.
 *
 * <p>A style is an array of tuples (u32 stringref, u32 start, u32 end). A stringref of 0xFFFFFFFF
 * indicates the end of a style array (note that the next array may start at the immediately
 * following word).
 */
public class StringPool extends ResChunk {
  private static final short HEADER_SIZE = 28;
  private static final int SORTED_FLAG = 0x1;
  private static final int UTF8_FLAG = 0x100;

  private final int stringCount;
  private final int styleCount;
  private final boolean utf8;
  private final boolean sorted;
  private final ByteBuffer stringOffsets;
  private final ByteBuffer styleOffsets;
  private final ByteBuffer stringData;
  private final ByteBuffer styleData;

  private StringPool(
      int stringCount,
      int styleCount,
      boolean utf8,
      boolean sorted,
      ByteBuffer stringOffsets,
      ByteBuffer styleOffsets,
      ByteBuffer stringData,
      ByteBuffer styleData) {
    super(
        CHUNK_STRING_POOL,
        HEADER_SIZE,
        HEADER_SIZE
            + stringOffsets.limit()
            + styleOffsets.limit()
            + stringData.limit()
            + styleData.limit());
    this.stringCount = stringCount;
    this.styleCount = styleCount;
    this.utf8 = utf8;
    this.sorted = sorted;
    this.stringOffsets = stringOffsets;
    this.styleOffsets = styleOffsets;
    this.stringData = stringData;
    this.styleData = styleData;
  }

  public static StringPool create(Iterable<String> strings) {
    List<String> stringsList = ImmutableList.copyOf(strings);
    int stringCount = stringsList.size();

    ByteBuffer stringOffsets = wrap(new byte[4 * stringCount]);
    ByteArrayOutputStream stringData = new ByteArrayOutputStream();

    byte[] encodedLength = new byte[4];
    ByteBuffer lengthBuf = wrap(encodedLength);
    for (int i = 0; i < stringsList.size(); i++) {
      lengthBuf.position(0);
      String value = stringsList.get(i);
      putEncodedLength(lengthBuf, value.length());
      ByteBuffer encoded = Charsets.UTF_8.encode(value);
      putEncodedLength(lengthBuf, encoded.limit());

      stringOffsets.putInt(i * 4, stringData.size());
      stringData.write(encodedLength, 0, lengthBuf.position());
      stringData.write(encoded.array(), encoded.arrayOffset(), encoded.limit());
      stringData.write(0);
    }

    // Pad to 4-byte boundary.
    lengthBuf.putInt(0, 0);
    stringData.write(encodedLength, 0, (4 - (stringData.size() % 4)) % 4);

    return new StringPool(
        stringCount,
        0,
        true,
        false,
        stringOffsets,
        wrap(new byte[0]),
        wrap(stringData.toByteArray()),
        wrap(new byte[0]));
  }

  private static void putEncodedLength(ByteBuffer buf, int length) {
    if (length < (1 << 7)) {
      buf.put((byte) length);
    } else {
      buf.put((byte) ((1 << 7) | (length >> 8)));
      buf.put((byte) (length & 0xFF));
    }
  }

  public static StringPool get(ByteBuffer buf) {
    int type = buf.getShort();
    int headerSize = buf.getShort();
    int chunkSize = buf.getInt();
    int stringCount = buf.getInt();
    int styleCount = buf.getInt();
    int flags = buf.getInt();
    boolean utf8 = (flags & UTF8_FLAG) != 0;
    boolean sorted = (flags & SORTED_FLAG) != 0;
    int stringsStart = buf.getInt();
    int stylesStart = buf.getInt();

    Preconditions.checkState(type == CHUNK_STRING_POOL);
    Preconditions.checkState(headerSize == HEADER_SIZE);
    Preconditions.checkState(stringsStart == headerSize + 4 * (stringCount + styleCount));

    buf = slice(buf, 0, chunkSize);
    // Adjust stylesStart to actually point at the end of the string data.
    stylesStart = stylesStart == 0 ? buf.limit() : stylesStart;
    return new StringPool(
        stringCount,
        styleCount,
        utf8,
        sorted,
        slice(buf, headerSize, 4 * stringCount),
        slice(buf, headerSize + 4 * stringCount, 4 * styleCount),
        slice(buf, stringsStart, stylesStart - stringsStart),
        slice(buf, stylesStart, buf.limit() - stylesStart));
  }

  @Override
  public void put(ByteBuffer output) {
    putChunkHeader(output);
    output.putInt(stringCount);
    output.putInt(styleCount);
    output.putInt((utf8 ? UTF8_FLAG : 0) | (sorted ? SORTED_FLAG : 0));
    int stringsStart = HEADER_SIZE + 4 * (stringCount + styleCount);
    output.putInt(stringsStart);
    output.putInt(styleCount == 0 ? 0 : stringsStart + stringData.limit());
    output.put(slice(stringOffsets, 0));
    output.put(slice(styleOffsets, 0));
    output.put(slice(stringData, 0));
    output.put(slice(styleData, 0));
  }

  private int getUtf8Length(int offset) {
    int hi = stringData.get(offset);
    if (hi < 0) {
      hi = ((hi & 0x7F) << 8) + (stringData.get(offset + 1) & 0xFF);
    }
    return hi;
  }

  private int getUtf16Length(int offset) {
    int hi = stringData.getShort(offset);
    if (hi < 0) {
      hi = ((hi & 0x7FFF) << 16) + (stringData.getShort(offset + 2) & 0xFFFF);
    }
    return hi;
  }

  private int getEncodedStringOffset(int id) {
    return stringOffsets.getInt(id * 4);
  }

  public String getString(int id) {
    return getStringAtOffset(getEncodedStringOffset(id), false);
  }

  private String getStringAtOffset(int offset, boolean forDump) {
    int length;
    if (utf8) {
      // For utf8 strings, both the length in code points and the length in bytes is encoded.
      int utf16Length = getUtf8Length(offset);
      offset += (utf16Length < (1 << 7)) ? 1 : 2;
      int utf8length = getUtf8Length(offset);
      offset += (utf8length < (1 << 7)) ? 1 : 2;
      // For `aapt dump strings`, aapt has a bug where they use the decoded length rather than the
      // encoded length when extracting string data...
      length = forDump ? utf16Length : utf8length;
    } else {
      length = getUtf16Length(offset);
      offset += (length < (1 << 15)) ? 2 : 4;
      length *= 2;
    }
    return decodeString(offset, length);
  }

  private String getStringForDump(int id) {
    return getStringAtOffset(getEncodedStringOffset(id), true);
  }

  private String decodeString(int start, int utf16Length) {
    byte[] data = new byte[utf16Length];
    stringData.position(start);
    stringData.get(data);
    stringData.position(0);
    return new String(data, utf8 ? Charsets.UTF_8 : Charsets.UTF_16LE);
  }

  public void dump(PrintStream out) {
    out.format(
        "String pool of %d unique %s %s strings, %d entries and %d styles using %d bytes:\n",
        stringCount,
        utf8 ? "UTF-8" : "UTF-16",
        sorted ? "sorted" : "non-sorted",
        stringCount,
        styleCount,
        getChunkSize());
    for (int i = 0; i < stringCount; i++) {
      out.format("String #%d: %s\n", i, getStringForDump(i));
    }
  }

  public int getStringCount() {
    return stringCount;
  }

  public boolean isUtf8() {
    return utf8;
  }

  public String getOutputNormalizedString(int data) {
    return getString(data).replace("\\", "\\\\").replace("\n", "\\n").replace("\"", "\\\"");
  }

  public StringPool copy() {
    return new StringPool(
        stringCount,
        styleCount,
        utf8,
        sorted,
        copy(stringOffsets),
        copy(styleOffsets),
        copy(stringData),
        copy(styleData));
  }
}