/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.builtin; import java.io.BufferedReader; import java.io.InputStreamReader; import java.io.IOException; import java.net.URL; import java.nio.charset.Charset; import java.util.Map; import org.apache.pig.ExecType; import org.apache.pig.LoadFunc; import org.apache.pig.PigException; import org.apache.pig.backend.datastorage.DataStorage; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.data.DataBag; import org.apache.pig.data.DataByteArray; import org.apache.pig.data.Tuple; import org.apache.pig.data.TupleFactory; import org.apache.pig.impl.io.BufferedPositionedInputStream; import org.apache.pig.impl.logicalLayer.schema.Schema; /** * This load function simply creates a tuple for each line of text that has a single field that * contains the line of text. */ public class TextLoader implements LoadFunc{ BufferedPositionedInputStream in; final private static Charset utf8 = Charset.forName("UTF8"); long end; private TupleFactory mTupleFactory = TupleFactory.getInstance(); public void bindTo(String fileName, BufferedPositionedInputStream in, long offset, long end) throws IOException { this.in = in; this.end = end; // Since we are not block aligned we throw away the first // record and count on a different instance to read it if (offset != 0) getNext(); } public Tuple getNext() throws IOException { if (in == null || in.getPosition() > end) return null; String line; if ((line = in.readLine(utf8, (byte)'\n')) != null) { if (line.length()>0 && line.charAt(line.length()-1)=='\r' && System.getProperty("os.name").toUpperCase().startsWith("WINDOWS")) line = line.substring(0, line.length()-1); return mTupleFactory.newTuple(new DataByteArray(line.getBytes())); } return null; } /** * TextLoader does not support conversion to Boolean. * @throws IOException if the value cannot be cast. */ public Boolean bytesToBoolean(byte[] b) throws IOException { int errCode = 2109; String msg = "TextLoader does not support conversion to Boolean."; throw new ExecException(msg, errCode, PigException.BUG); } /** * TextLoader does not support conversion to Integer * @throws IOException if the value cannot be cast. */ public Integer bytesToInteger(byte[] b) throws IOException { int errCode = 2109; String msg = "TextLoader does not support conversion to Integer."; throw new ExecException(msg, errCode, PigException.BUG); } /** * TextLoader does not support conversion to Long * @throws IOException if the value cannot be cast. */ public Long bytesToLong(byte[] b) throws IOException { int errCode = 2109; String msg = "TextLoader does not support conversion to Long."; throw new ExecException(msg, errCode, PigException.BUG); } /** * TextLoader does not support conversion to Float * @throws IOException if the value cannot be cast. */ public Float bytesToFloat(byte[] b) throws IOException { int errCode = 2109; String msg = "TextLoader does not support conversion to Float."; throw new ExecException(msg, errCode, PigException.BUG); } /** * TextLoader does not support conversion to Double * @throws IOException if the value cannot be cast. */ public Double bytesToDouble(byte[] b) throws IOException { int errCode = 2109; String msg = "TextLoader does not support conversion to Double."; throw new ExecException(msg, errCode, PigException.BUG); } /** * Cast data from bytes to chararray value. * @param b byte array to be cast. * @return String value. * @throws IOException if the value cannot be cast. */ public String bytesToCharArray(byte[] b) throws IOException { return new String(b); } /** * TextLoader does not support conversion to Map * @throws IOException if the value cannot be cast. */ public Map<Object, Object> bytesToMap(byte[] b) throws IOException { int errCode = 2109; String msg = "TextLoader does not support conversion to Map."; throw new ExecException(msg, errCode, PigException.BUG); } /** * TextLoader does not support conversion to Tuple * @throws IOException if the value cannot be cast. */ public Tuple bytesToTuple(byte[] b) throws IOException { int errCode = 2109; String msg = "TextLoader does not support conversion to Tuple."; throw new ExecException(msg, errCode, PigException.BUG); } /** * TextLoader does not support conversion to Bag * @throws IOException if the value cannot be cast. */ public DataBag bytesToBag(byte[] b) throws IOException { int errCode = 2109; String msg = "TextLoader does not support conversion to Bag."; throw new ExecException(msg, errCode, PigException.BUG); } /** * TextLoader doesn't make use of this. */ public void fieldsToRead(Schema schema) {} /** * TextLoader does not provide a schema. */ public Schema determineSchema(String fileName, ExecType execType, DataStorage storage) throws IOException { // TODO Auto-generated method stub return null; } public byte[] toBytes(DataBag bag) throws IOException { int errCode = 2109; String msg = "TextLoader does not support conversion from Bag."; throw new ExecException(msg, errCode, PigException.BUG); } public byte[] toBytes(String s) throws IOException { return s.getBytes(); } public byte[] toBytes(Double d) throws IOException { int errCode = 2109; String msg = "TextLoader does not support conversion from Double."; throw new ExecException(msg, errCode, PigException.BUG); } public byte[] toBytes(Float f) throws IOException { int errCode = 2109; String msg = "TextLoader does not support conversion from Float."; throw new ExecException(msg, errCode, PigException.BUG); } public byte[] toBytes(Integer i) throws IOException { int errCode = 2109; String msg = "TextLoader does not support conversion from Integer."; throw new ExecException(msg, errCode, PigException.BUG); } public byte[] toBytes(Long l) throws IOException { int errCode = 2109; String msg = "TextLoader does not support conversion from Long."; throw new ExecException(msg, errCode, PigException.BUG); } public byte[] toBytes(Map<Object, Object> m) throws IOException { int errCode = 2109; String msg = "TextLoader does not support conversion from Map."; throw new ExecException(msg, errCode, PigException.BUG); } public byte[] toBytes(Tuple t) throws IOException { int errCode = 2109; String msg = "TextLoader does not support conversion from Tuple."; throw new ExecException(msg, errCode, PigException.BUG); } }