package com.hubspot.singularity.data; import static org.assertj.core.api.Assertions.assertThat; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import org.junit.Test; import com.google.inject.Inject; import com.hubspot.mesos.json.MesosFileChunkObject; import com.hubspot.singularity.SingularityTestBaseNoDb; import com.ning.http.client.Response; public class SandboxManagerTest extends SingularityTestBaseNoDb { private static final int DEFAULT_OFFSET = 123; private static final String JSON_START = "{\"data\":\""; private static final String JSON_END = "\",\"offset\":" + DEFAULT_OFFSET + "}"; private static final String SNOWMAN = "☃"; private static final byte[] SNOWMAN_UTF8_BYTES = SNOWMAN.getBytes(StandardCharsets.UTF_8); private static final byte FIRST_SNOWMAN_BYTE = SNOWMAN_UTF8_BYTES[0]; private static final byte SECOND_SNOWMAN_BYTE = SNOWMAN_UTF8_BYTES[1]; private static final String BALLOON = "\uD83C\uDF88"; private static final byte[] BALLOON_BYTES = BALLOON.getBytes(StandardCharsets.UTF_8); private static final byte SECOND_BALLOON_BYTE = BALLOON_BYTES[1]; private static final byte THIRD_BALLOON_BYTE = BALLOON_BYTES[2]; @Inject private SandboxManager sandboxManager; @Test public void testInvalidUtf8WithOneByteOfThreeByteCharacter() throws IOException { // data contains a ☃ character and the first byte of another ☃ character byte[] bytes = toBytes(JSON_START, SNOWMAN_UTF8_BYTES, FIRST_SNOWMAN_BYTE, JSON_END); MesosFileChunkObject chunk = sandboxManager.parseResponseBody(response(bytes)); // the partial ☃ should be dropped assertThat(chunk.getData()).isEqualTo(SNOWMAN); assertThat(chunk.getOffset()).isEqualTo(DEFAULT_OFFSET); } @Test public void testInvalidUtf8WithTwoBytesOfThreeByteCharacter() throws IOException { // data contains a ☃ character and the first two bytes of another ☃ character byte[] bytes = toBytes(JSON_START, SNOWMAN_UTF8_BYTES, FIRST_SNOWMAN_BYTE, SECOND_SNOWMAN_BYTE, JSON_END); MesosFileChunkObject chunk = sandboxManager.parseResponseBody(response(bytes)); // the partial ☃ should be dropped assertThat(chunk.getData()).isEqualTo(SNOWMAN); assertThat(chunk.getOffset()).isEqualTo(DEFAULT_OFFSET); } @Test public void testValidUtf8WithThreeByteCharacters() throws IOException { // data contains two ☃ characters byte[] bytes = toBytes(JSON_START, SNOWMAN_UTF8_BYTES, SNOWMAN_UTF8_BYTES, JSON_END); MesosFileChunkObject chunk = sandboxManager.parseResponseBody(response(bytes)); // nothing should be dropped assertThat(chunk.getData()).isEqualTo(SNOWMAN + SNOWMAN); assertThat(chunk.getOffset()).isEqualTo(DEFAULT_OFFSET); } @Test public void testInvalidUtf8WithLastByte() throws IOException { // data contains last byte of a fire character and a ☃ character byte[] bytes = toBytes(JSON_START, THIRD_BALLOON_BYTE, SNOWMAN_UTF8_BYTES, JSON_END); MesosFileChunkObject chunk = sandboxManager.parseResponseBody(response(bytes)); // the partial fire should be dropped and the offset should be advanced by one byte assertThat(chunk.getData()).isEqualTo(SNOWMAN); assertThat(chunk.getOffset()).isEqualTo(DEFAULT_OFFSET + 1); } @Test public void testInvalidUtf8WithLastTwoBytes() throws IOException { // data contains last two bytes of a fire character and a ☃ character byte[] bytes = toBytes(JSON_START, SECOND_BALLOON_BYTE, THIRD_BALLOON_BYTE, SNOWMAN_UTF8_BYTES, JSON_END); MesosFileChunkObject chunk = sandboxManager.parseResponseBody(response(bytes)); // the partial fire should be dropped and the offset should be advanced by two bytes assertThat(chunk.getData()).isEqualTo(SNOWMAN); assertThat(chunk.getOffset()).isEqualTo(DEFAULT_OFFSET + 2); } @Test public void testInvalidUtf8WithOneByte() throws IOException { // data contains the last middle byte of a fire character byte[] bytes = toBytes(JSON_START, SECOND_BALLOON_BYTE, JSON_END); MesosFileChunkObject chunk = sandboxManager.parseResponseBody(response(bytes)); // the partial fire should be dropped and the offset should be advanced by one byte assertThat(chunk.getData()).isEqualTo(""); assertThat(chunk.getOffset()).isEqualTo(DEFAULT_OFFSET + 1); } @Test public void testInvalidUtf8WithTwoBytes() throws IOException { // data contains the last two bytes of a fire character byte[] bytes = toBytes(JSON_START, SECOND_BALLOON_BYTE, THIRD_BALLOON_BYTE, JSON_END); MesosFileChunkObject chunk = sandboxManager.parseResponseBody(response(bytes)); // the partial fire should be dropped and the offset should be advanced by two bytes assertThat(chunk.getData()).isEqualTo(""); assertThat(chunk.getOffset()).isEqualTo(DEFAULT_OFFSET + 2); } private static byte[] toBytes(Object... objects) throws IOException { ByteArrayOutputStream output = new ByteArrayOutputStream(); for (Object o : objects) { if (o instanceof String) { output.write(((String) o).getBytes(StandardCharsets.UTF_8)); } else if (o instanceof byte[]) { output.write((byte[]) o); } else if (o instanceof Byte) { output.write((Byte) o); } } return output.toByteArray(); } private static Response response(byte[] bytes) throws IOException { Response response = mock(Response.class); when(response.getResponseBodyAsByteBuffer()).thenReturn(ByteBuffer.wrap(bytes)); return response; } }