package org.rakam.postgresql.analysis; import com.google.common.base.Throwables; import com.google.inject.Singleton; import com.google.inject.name.Named; import io.airlift.log.Logger; import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; import org.postgresql.util.PGobject; import org.rakam.analysis.JDBCPoolDataSource; import org.rakam.analysis.metadata.Metastore; import org.rakam.collection.Event; import org.rakam.collection.FieldDependencyBuilder; import org.rakam.collection.FieldDependencyBuilder.FieldDependency; import org.rakam.collection.FieldType; import org.rakam.collection.SchemaField; import org.rakam.plugin.EventStore; import org.rakam.plugin.SyncEventStore; import org.rakam.util.JsonHelper; import org.rakam.util.ValidationUtil; import javax.inject.Inject; import java.math.BigDecimal; import java.sql.Connection; import java.sql.Date; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Time; import java.sql.Timestamp; import java.time.LocalDate; import java.time.LocalTime; import java.time.ZoneId; import java.util.ArrayList; import java.util.Calendar; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TimeZone; import java.util.stream.Collectors; import java.util.stream.IntStream; import static org.rakam.util.ValidationUtil.checkTableColumn; @Singleton public class PostgresqlEventStore implements SyncEventStore { private final static Logger LOGGER = Logger.get(PostgresqlEventStore.class); private final Set<String> sourceFields; private final JDBCPoolDataSource connectionPool; public static final Calendar UTC_CALENDAR = Calendar.getInstance(TimeZone.getTimeZone(ZoneId.of("UTC"))); @Inject public PostgresqlEventStore(@Named("store.adapter.postgresql") JDBCPoolDataSource connectionPool, FieldDependency fieldDependency) { this.connectionPool = connectionPool; this.sourceFields = fieldDependency.dependentFields.keySet(); } @Override public void store(Event event) { GenericRecord record = event.properties(); try (Connection connection = connectionPool.getConnection()) { Schema schema = event.properties().getSchema(); PreparedStatement ps = connection.prepareStatement(getQuery(event.project(), event.collection(), schema)); bindParam(connection, ps, event.schema(), record); ps.executeUpdate(); } catch (SQLException e) { Throwables.propagate(e); } } @Override public int[] storeBatch(List<Event> events) { Map<String, List<Event>> groupedByCollection = events.stream() .collect(Collectors.groupingBy(Event::collection)); Map<String, Integer> successfulCollections = new HashMap<>(groupedByCollection.size()); try (Connection connection = connectionPool.getConnection()) { for (Map.Entry<String, List<Event>> entry : groupedByCollection.entrySet()) { connection.setAutoCommit(false); // last event must have the last schema List<Event> eventsForCollection = entry.getValue(); Event lastEvent = getLastEvent(eventsForCollection); PreparedStatement ps = connection.prepareStatement(getQuery(lastEvent.project(), entry.getKey(), lastEvent.properties().getSchema())); for (int i = 0; i < eventsForCollection.size(); i++) { Event event = eventsForCollection.get(i); GenericRecord properties = event.properties(); bindParam(connection, ps, lastEvent.schema(), properties); ps.addBatch(); if (i > 0 && i % 5000 == 0) { ps.executeBatch(); Integer value = successfulCollections.get(entry.getKey()); if(value == null) { successfulCollections.put(entry.getKey(), i); } else { successfulCollections.put(entry.getKey(), i + value); } } } ps.executeBatch(); connection.commit(); successfulCollections.compute(entry.getKey(), (k, v) -> eventsForCollection.size()); } connection.setAutoCommit(true); return EventStore.SUCCESSFUL_BATCH; } catch (SQLException e) { List<Event> sample = events.size() > 5 ? events.subList(0, 5) : events; LOGGER.error(e.getNextException() != null ? e.getNextException() : e, "Error while storing events in Postgresql batch query: " + sample); return IntStream.range(0, events.size()).filter(idx -> { Event event = events.get(idx); Integer checkpointPosition = successfulCollections.get(event.collection()); return checkpointPosition == null || groupedByCollection.get(event.collection()).indexOf(event) > checkpointPosition; }).toArray(); } } // get the event with the last schema private Event getLastEvent(List<Event> eventsForCollection) { Event event = eventsForCollection.get(0); for (int i = 1; i < eventsForCollection.size(); i++) { Event newEvent = eventsForCollection.get(i); if (newEvent.schema().size() > event.schema().size()) { event = newEvent; } } return event; } private void bindParam(Connection connection, PreparedStatement ps, List<SchemaField> fields, GenericRecord record) throws SQLException { Object value; for (int i = 0; i < fields.size(); i++) { SchemaField field = fields.get(i); value = record.get(field.getName()); if (value == null) { ps.setNull(i + 1, 0); continue; } FieldType type = field.getType(); switch (type) { case STRING: ps.setString(i + 1, (String) value); break; case LONG: ps.setLong(i + 1, ((Number) value).longValue()); break; case INTEGER: ps.setInt(i + 1, ((Number) value).intValue()); break; case DECIMAL: ps.setBigDecimal(i + 1, new BigDecimal(((Number) value).doubleValue())); break; case DOUBLE: ps.setDouble(i + 1, ((Number) value).doubleValue()); break; case TIMESTAMP: Timestamp x = new Timestamp(((Number) value).longValue()); if (x.getTime() < 0) { ps.setTimestamp(i + 1, null); } else { ps.setTimestamp(i + 1, x, UTC_CALENDAR); } break; case TIME: ps.setTime(i + 1, Time.valueOf(LocalTime.ofSecondOfDay(((Number) value).intValue())), UTC_CALENDAR); break; case DATE: ps.setDate(i + 1, Date.valueOf(LocalDate.ofEpochDay(((Number) value).intValue()))); break; case BOOLEAN: ps.setBoolean(i + 1, (Boolean) value); break; case BINARY: ps.setBytes(i + 1, (byte[]) value); break; default: if (type.isArray()) { String typeName = toPostgresqlPrimitiveTypeName(type.getArrayElementType()); ps.setArray(i + 1, connection.createArrayOf(typeName, ((List) value).toArray())); } else if (type.isMap()) { PGobject jsonObject = new PGobject(); jsonObject.setType("jsonb"); jsonObject.setValue(JsonHelper.encode(value)); ps.setObject(i + 1, jsonObject); } else { throw new UnsupportedOperationException(); } } } } private String getQuery(String project, String collection, Schema schema) { StringBuilder query = new StringBuilder("INSERT INTO ") .append(project) .append(".") .append(ValidationUtil.checkCollection(collection)); StringBuilder params = new StringBuilder(); List<Schema.Field> fields = schema.getFields(); Schema.Field firstField = fields.get(0); if (!sourceFields.contains(firstField.name())) { query.append(" (").append(checkTableColumn(firstField.name())); params.append('?'); } for (int i = 1; i < fields.size(); i++) { Schema.Field field = fields.get(i); if (!sourceFields.contains(field.name())) { query.append(", ").append(checkTableColumn(field.name())); params.append(", ?"); } } return query.append(") VALUES (").append(params.toString()).append(")").toString(); } public static String toPostgresqlPrimitiveTypeName(FieldType type) { switch (type) { case LONG: return "int8"; case INTEGER: return "int4"; case DECIMAL: return "decimal"; case STRING: return "text"; case BOOLEAN: return "bool"; case DATE: return "date"; case TIME: return "time"; case TIMESTAMP: return "timestamp"; case DOUBLE: return "float8"; default: throw new IllegalStateException("sql type couldn't converted to fieldtype"); } } }