/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nutch.crawl; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.Map; import java.util.Set; // Commons Logging imports import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.DataInputBuffer; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.MD5Hash; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.ObjectWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.util.StringUtils; import org.apache.nutch.protocol.ProtocolStatus; /** * A writable map, with a similar behavior as <code>java.util.HashMap</code>. * In addition to the size of key and value writable tuple two additional bytes * are stored to identify the Writable classes. This means that a maximum of * 255 different class types can be used for key and value objects. * A binary-id to class mapping is defined in a static block of this class. * However it is possible to use custom implementations of Writable. * For these custom Writables we write the byte id - utf class name tuple * into the header of each MapWritable that uses these types. * * @author Stefan Groschupf * @deprecated Use org.apache.hadoop.io.MapWritable instead. */ public class MapWritable implements Writable { public static final Log LOG = LogFactory.getLog(MapWritable.class); private KeyValueEntry fFirst; private KeyValueEntry fLast; private KeyValueEntry fOld; private int fSize = 0; private int fIdCount = 0; private ClassIdEntry fIdLast; private ClassIdEntry fIdFirst; private static Map<Class, Byte> CLASS_ID_MAP = new HashMap<Class, Byte>(); private static Map<Byte, Class> ID_CLASS_MAP = new HashMap<Byte, Class>(); static { addToMap(NullWritable.class, new Byte((byte) -127)); addToMap(LongWritable.class, new Byte((byte) -126)); addToMap(Text.class, new Byte((byte) -125)); addToMap(MD5Hash.class, new Byte((byte) -124)); addToMap(org.apache.nutch.fetcher.FetcherOutput.class, new Byte((byte) -123)); addToMap(org.apache.nutch.protocol.Content.class, new Byte((byte) -122)); addToMap(org.apache.nutch.parse.ParseText.class, new Byte((byte) -121)); addToMap(org.apache.nutch.parse.ParseData.class, new Byte((byte) -120)); addToMap(MapWritable.class, new Byte((byte) -119)); addToMap(BytesWritable.class, new Byte((byte) -118)); addToMap(FloatWritable.class, new Byte((byte) -117)); addToMap(IntWritable.class, new Byte((byte) -116)); addToMap(ObjectWritable.class, new Byte((byte) -115)); addToMap(ProtocolStatus.class, new Byte((byte) -114)); } private static void addToMap(Class clazz, Byte byteId) { CLASS_ID_MAP.put(clazz, byteId); ID_CLASS_MAP.put(byteId, clazz); } public MapWritable() { } /** * Copy constructor. This constructor makes a deep copy, using serialization / * deserialization to break any possible references to contained objects. * * @param map map to copy from */ public MapWritable(MapWritable map) { if (map != null) { try { DataOutputBuffer dob = new DataOutputBuffer(); map.write(dob); DataInputBuffer dib = new DataInputBuffer(); dib.reset(dob.getData(), dob.getLength()); readFields(dib); } catch (IOException e) { throw new IllegalArgumentException("this map cannot be copied: " + StringUtils.stringifyException(e)); } } } public void clear() { fOld = fFirst; fFirst = fLast = null; fSize = 0; } public boolean containsKey(Writable key) { return findEntryByKey(key) != null; } public boolean containsValue(Writable value) { KeyValueEntry entry = fFirst; while (entry != null) { if (entry.fValue.equals(value)) { return true; } entry = entry.fNextEntry; } return false; } public Writable get(Writable key) { KeyValueEntry entry = findEntryByKey(key); if (entry != null) { return entry.fValue; } return null; } public int hashCode() { final int seed = 23; int hash = 0; KeyValueEntry entry = fFirst; while (entry != null) { hash += entry.fKey.hashCode() * seed; hash += entry.fValue.hashCode() * seed; entry = entry.fNextEntry; } return hash; } public boolean isEmpty() { return fFirst == null; } public Set<Writable> keySet() { HashSet<Writable> set = new HashSet<Writable>(); if (isEmpty()) return set; set.add(fFirst.fKey); KeyValueEntry entry = fFirst; while ((entry = entry.fNextEntry) != null) { set.add(entry.fKey); } return set; } public Writable put(Writable key, Writable value) { KeyValueEntry entry = findEntryByKey(key); if (entry != null) { Writable oldValue = entry.fValue; entry.fValue = value; return oldValue; } KeyValueEntry newEntry = new KeyValueEntry(key, value); fSize++; if (fLast != null) { fLast = fLast.fNextEntry = newEntry; return null; } fLast = fFirst = newEntry; return null; } public void putAll(MapWritable map) { if (map == null || map.size() == 0) { return; } Iterator<Writable> iterator = map.keySet().iterator(); while (iterator.hasNext()) { Writable key = iterator.next(); Writable value = map.get(key); put(key, value); } } public Writable remove(Writable key) { Writable oldValue = null; KeyValueEntry entry = fFirst; KeyValueEntry predecessor = null; while (entry != null) { if (entry.fKey.equals(key)) { oldValue = entry.fValue; if (predecessor == null) { fFirst = fFirst.fNextEntry; } else { predecessor.fNextEntry = entry.fNextEntry; } if (fLast.equals(entry)) { fLast = predecessor; } fSize--; return oldValue; } predecessor = entry; entry = entry.fNextEntry; } return oldValue; } public int size() { return fSize; } public Collection<Writable> values() { LinkedList<Writable> list = new LinkedList<Writable>(); KeyValueEntry entry = fFirst; while (entry != null) { list.add(entry.fValue); entry = entry.fNextEntry; } return list; } public boolean equals(Object obj) { if (obj instanceof MapWritable) { MapWritable map = (MapWritable) obj; if (fSize != map.fSize) return false; HashSet<KeyValueEntry> set1 = new HashSet<KeyValueEntry>(); KeyValueEntry e1 = fFirst; while (e1 != null) { set1.add(e1); e1 = e1.fNextEntry; } HashSet<KeyValueEntry> set2 = new HashSet<KeyValueEntry>(); KeyValueEntry e2 = map.fFirst; while (e2 != null) { set2.add(e2); e2 = e2.fNextEntry; } return set1.equals(set2); } return false; } public String toString() { if (fFirst != null) { StringBuffer buffer = new StringBuffer(); KeyValueEntry entry = fFirst; while (entry != null) { buffer.append(entry.toString()); buffer.append(" "); entry = entry.fNextEntry; } return buffer.toString(); } return null; } private KeyValueEntry findEntryByKey(final Writable key) { KeyValueEntry entry = fFirst; while (entry != null && !entry.fKey.equals(key)) { entry = entry.fNextEntry; } return entry; } // serialization methods public void write(DataOutput out) throws IOException { out.writeInt(size()); if (size() > 0) { // scan for unknown classes; createInternalIdClassEntries(); // write internal map out.writeByte(fIdCount); if (fIdCount > 0) { ClassIdEntry entry = fIdFirst; while (entry != null) { out.writeByte(entry.fId); Text.writeString(out, entry.fclazz.getName()); entry = entry.fNextIdEntry; } } // write meta data KeyValueEntry entry = fFirst; while (entry != null) { out.writeByte(entry.fKeyClassId); out.writeByte(entry.fValueClassId); entry.fKey.write(out); entry.fValue.write(out); entry = entry.fNextEntry; } } } public void readFields(DataInput in) throws IOException { clear(); fSize = in.readInt(); if (fSize > 0) { // read class-id map fIdCount = in.readByte(); byte id; Class clazz; for (int i = 0; i < fIdCount; i++) { try { id = in.readByte(); clazz = Class.forName(Text.readString(in)); addIdEntry(id, clazz); } catch (Exception e) { if (LOG.isWarnEnabled()) { LOG.warn("Unable to load internal map entry" + e.toString()); } fIdCount--; } } KeyValueEntry entry; for (int i = 0; i < fSize; i++) { try { entry = getKeyValueEntry(in.readByte(), in.readByte()); entry.fKey.readFields(in); entry.fValue.readFields(in); if (fFirst == null) { fFirst = fLast = entry; } else { fLast = fLast.fNextEntry = entry; } } catch (IOException e) { if (LOG.isWarnEnabled()) { LOG.warn("Unable to load meta data entry, ignoring.. : " + e.toString()); } fSize--; } } } } private void createInternalIdClassEntries() { KeyValueEntry entry = fFirst; byte id; while (entry != null) { id = getClassId(entry.fKey.getClass()); if (id == -128) { id = addIdEntry((byte) (-128 + CLASS_ID_MAP.size() + ++fIdCount), entry.fKey.getClass()); } entry.fKeyClassId = id; id = getClassId(entry.fValue.getClass()); if (id == -128) { id = addIdEntry((byte) (-128 + CLASS_ID_MAP.size() + ++fIdCount), entry.fValue.getClass()); } entry.fValueClassId = id; entry = entry.fNextEntry; } } private byte addIdEntry(byte id, Class clazz) { if (fIdFirst == null) { fIdFirst = fIdLast = new ClassIdEntry(id, clazz); } else { fIdLast.fNextIdEntry = fIdLast = new ClassIdEntry(id, clazz); } return id; } private byte getClassId(Class clazz) { Byte classId = CLASS_ID_MAP.get(clazz); if (classId != null) { return classId.byteValue(); } ClassIdEntry entry = fIdFirst; while (entry != null) { if (entry.fclazz.equals(clazz)) { return entry.fId; } entry = entry.fNextIdEntry; } return -128; } private KeyValueEntry getKeyValueEntry(final byte keyId, final byte valueId) throws IOException { KeyValueEntry entry = fOld; KeyValueEntry last = null; byte entryKeyId; byte entryValueId; while (entry != null) { entryKeyId = getClassId(entry.fKey.getClass()); entryValueId = getClassId(entry.fValue.getClass()); if (entryKeyId == keyId && entryValueId == valueId) { if (last != null) { last.fNextEntry = entry.fNextEntry; } else { fOld = entry.fNextEntry; } entry.fNextEntry = null; // reset next entry return entry; } last = entry; entry = entry.fNextEntry; } Class keyClass = getClass(keyId); Class valueClass = getClass(valueId); try { return new KeyValueEntry((Writable) keyClass.newInstance(), (Writable) valueClass.newInstance()); } catch (Exception e) { throw new IOException("unable to instantiate class: " + e.toString()); } } private Class getClass(final byte id) throws IOException { Class clazz = ID_CLASS_MAP.get(new Byte(id)); if (clazz == null) { ClassIdEntry entry = fIdFirst; while (entry != null) { if (entry.fId == id) { return entry.fclazz; } entry = entry.fNextIdEntry; } } else { return clazz; } throw new IOException("unable to load class for id: " + id); } /** an entry holds writable key and value */ private class KeyValueEntry { private byte fKeyClassId; private byte fValueClassId; private Writable fKey; private Writable fValue; private KeyValueEntry fNextEntry; public KeyValueEntry(Writable key, Writable value) { this.fKey = key; this.fValue = value; } public String toString() { return fKey.toString() + ":" + fValue.toString(); } public boolean equals(Object obj) { if (obj instanceof KeyValueEntry) { KeyValueEntry entry = (KeyValueEntry) obj; return entry.fKey.equals(fKey) && entry.fValue.equals(fValue); } return false; } public int hashCode() { return toString().hashCode(); } } /** container for Id class tuples */ private class ClassIdEntry { public ClassIdEntry(byte id, Class clazz) { fId = id; fclazz = clazz; } private byte fId; private Class fclazz; private ClassIdEntry fNextIdEntry; } }