/* Copyright (c) 2012 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ /** * $id$ */ package com.linkedin.data.transform.patch; import static com.linkedin.data.transform.patch.PatchConstants.COMMAND_PREFIX; import static com.linkedin.data.transform.patch.PatchConstants.DELETE_COMMAND; import static com.linkedin.data.transform.patch.PatchConstants.SET_COMMAND; import java.util.Arrays; import java.util.HashMap; import java.util.IdentityHashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import com.linkedin.data.DataList; import com.linkedin.data.DataMap; import com.linkedin.data.transform.Instruction; import com.linkedin.data.transform.Interpreter; import com.linkedin.data.transform.InterpreterContext; public class Patch implements Interpreter { //constant used internally by the Patch to mark that field has been used //for further processing private static final String CHILD_PROCESS_PSEUDOCOMMAND = "$child-process"; // list of operations that do data manipulation other than just removing data private static final List<String> NON_DELETE_OPERATIONS = Arrays.asList(new String[] { SET_COMMAND }); // used for memoization of types of operations down the tree, contains true if down the // operations tree are only $delete operations the reason for using IdentityHashMap is // that we know that each node is distinct object, they never repeat in a tree and we // want to avoid expensive hash calculations on maps and lists private IdentityHashMap<DataMap, Boolean> _hasDeletesOnly = new IdentityHashMap<DataMap, Boolean>(); // On $set and $delete operations, log the path as an info message in the interpreter context private final boolean _logOperations; public Patch() { this(false); } public Patch(boolean logOperations) { _logOperations = logOperations; } /** * Interpret and execute the current instruction based on the * interpreter context. * * @param instrCtx the current interpreter context */ public void interpret(final InterpreterContext instrCtx) { Instruction instruction = instrCtx.getCurrentInstruction(); // preconditions: // operation's node is always DataMap assert instruction.getOperation().getClass() == DataMap.class; // data's node is always DataMap assert instruction.getData().getClass() == DataMap.class; //_usedFields variable is used to keep track of fields, which were already used //at this nodes. The reason for it is that if field should not be used in more than //one operation e.g. $set and $delete, because such patch becomes ambiguous. //Each operation, upon being executed updates this variable. final Map<String, String> usedFields = new HashMap<String, String>(); DataMap opNode = (DataMap) instruction.getOperation(); DataMap dataNode = (DataMap) instruction.getData(); /** * Apply all supported operations here. _usedFields is used to keep track of fields * that operations were applied to. */ executeSetCommand(opNode.get(SET_COMMAND), dataNode, usedFields, instrCtx); executeDeleteCommand(opNode.get(DELETE_COMMAND), dataNode, usedFields, instrCtx); // iterate over children for (Entry<String, Object> entry : opNode.entrySet()) processChild(dataNode, entry.getKey(), entry.getValue(), usedFields, instrCtx); } private boolean processChild(DataMap dataNode, String name, Object opChild, Map<String, String> usedFields, final InterpreterContext instrCtx) { instrCtx.setCurrentField(name); // do not process reserved words if (!name.startsWith(COMMAND_PREFIX)) { if (usedFields.containsKey(name)) { instrCtx.addErrorMessage("field %1$s can not be used in both %2$s operation and " + "be a branch in Patch at the same time", name, usedFields.get(name)); return false; } else if (opChild.getClass() == DataMap.class) { usedFields.put(name, CHILD_PROCESS_PSEUDOCOMMAND); DataMap opChildDataMap = (DataMap) opChild; Object dataChild = dataNode.get(name); if (dataChild == null) { // this is an optimization: if respective object does not exist in data // and if patch's branch contains only deletes operations, then it is // not necessary to create nodes on the data object and process that branch, // unless we have to log all delete operations if (!hasDeletesOnly(opChildDataMap) || _logOperations) { // if patch does data manipulations other than deletes, then we need to // create respective branch in data object and continue processing patch // on that branch dataChild = new DataMap(); dataNode.put(name, dataChild); instrCtx.scheduleInstruction(new Instruction(opChild, (DataMap)dataChild, instrCtx.getPath())); } } else { // equivalent object exists in data tree if (dataChild.getClass() == DataMap.class) // if it's of proper type, then create new instruction instrCtx.scheduleInstruction(new Instruction(opChild, (DataMap)dataChild, instrCtx.getPath())); else // incorrect type in data object - it means that patch is // incompatible with data { instrCtx.addErrorMessage("patch incopatible with data object, expected %1$s" + " field to be of type DataMap, but found: %2$s", name, dataChild.getClass().getName()); return false; } } } else { instrCtx.addErrorMessage("incorrect wire format of patch, simple type values are " + "allowed only as children of commands; node name: %1$s, value: %2$s", name, opChild); return false; } } return true; } private boolean executeDeleteCommand(Object deleteCommand, Object data, Map<String, String> usedFields, final InterpreterContext instrCtx) { instrCtx.setCurrentField(DELETE_COMMAND); if (deleteCommand != null) { // preconditions: // deleteCommand value is of type DataList assert deleteCommand.getClass() == DataList.class; // data is of type DataMap assert data.getClass() == DataMap.class; DataList delDataList = (DataList) deleteCommand; DataMap dataDataMap = (DataMap) data; for (Object key : delDataList) { if (usedFields.containsKey(key)) { instrCtx.addErrorMessage("field %1$s can not be used in both %2$s operation and " + DELETE_COMMAND + " operation at the same time", key, usedFields.get(key)); return false; } else { usedFields.put(key.toString(), DELETE_COMMAND); dataDataMap.remove(key); if (_logOperations) { instrCtx.addInfoMessage(key.toString()); } } } } return true; } /** * Executes $set command and returns true is it was successful and false otherwise. */ private boolean executeSetCommand(Object setCommand, Object data, Map<String, String> usedFields, final InterpreterContext instrCtx) { instrCtx.setCurrentField(SET_COMMAND); if (setCommand != null) { // input invariants // deleteCommand value is of DataMap type assert setCommand.getClass() == DataMap.class : setCommand.getClass(); // data is of DataMap type assert data.getClass() == DataMap.class : data.getClass(); DataMap setDataMap = (DataMap) setCommand; DataMap dataDataMap = (DataMap) data; for (Entry<String, Object> entry : setDataMap.entrySet()) { String key = entry.getKey(); if (usedFields.containsKey(key)) { instrCtx.addErrorMessage("field %1$s can not be used in both %2$s operation and " + SET_COMMAND + " operation at the same time", key, usedFields.get(key)); return false; } else { usedFields.put(key.toString(), SET_COMMAND); dataDataMap.put(key, entry.getValue()); if (_logOperations) { instrCtx.addInfoMessage(key); } } } } return true; } /** * Checks whether patch rooted in given node contains data manipulations other * than deletes. It is used for optimization e.g. if patch contains only deletes and * respective branch does not exist in the data object, then creation of that branch in * data object and further processing can be skipped. This method uses memoization to * achieve amortized constant time per operation. * * @param opNode * node, whose contents will be inspected to check if they contain only delete * operations * @return true if node (and it's subnodes) contain only delete operations */ private boolean hasDeletesOnly(DataMap opNode) { Boolean hdo = _hasDeletesOnly.get(opNode); if (hdo == null) { // if value has not been computed yet // patch has deletes only unless: // - current node contains non delete operation Iterator<String> it = NON_DELETE_OPERATIONS.iterator(); while (hdo == null && it.hasNext()) { if (opNode.containsKey(it.next())) hdo = false; } // - one of children contains non delete operation Iterator<Object> nodeIt = opNode.values().iterator(); while (hdo == null && nodeIt.hasNext()) { Object child = nodeIt.next(); if (child.getClass() == DataMap.class) if (!hasDeletesOnly((DataMap) child)) hdo = false; } // if neither of previous conditions holds, then // patch contains only delete operations if (hdo == null) hdo = true; // memorize this value _hasDeletesOnly.put(opNode, hdo); } return hdo; } }