/* * Copyright 2012 castLabs GmbH, Berlin * * Licensed under the Apache License, Version 2.0 (the License); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an AS IS BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.mp4parser.muxer.tracks; import org.mp4parser.boxes.iso14496.part1.objectdescriptors.*; import org.mp4parser.boxes.iso14496.part12.CompositionTimeToSample; import org.mp4parser.boxes.iso14496.part12.SampleDependencyTypeBox; import org.mp4parser.boxes.iso14496.part12.SampleDescriptionBox; import org.mp4parser.boxes.iso14496.part12.SubSampleInformationBox; import org.mp4parser.boxes.iso14496.part14.ESDescriptorBox; import org.mp4parser.boxes.sampleentry.AudioSampleEntry; import org.mp4parser.muxer.AbstractTrack; import org.mp4parser.muxer.DataSource; import org.mp4parser.muxer.Sample; import org.mp4parser.muxer.TrackMetaData; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.WritableByteChannel; import java.util.*; /** */ public class AACTrackImpl extends AbstractTrack { public static final Map<Integer, Integer> SAMPLING_FREQUENCY_INDEX_MAP = new HashMap<Integer, Integer>(); static Map<Integer, String> audioObjectTypes = new HashMap<Integer, String>(); static { audioObjectTypes.put(1, "AAC Main"); audioObjectTypes.put(2, "AAC LC (Low Complexity)"); audioObjectTypes.put(3, "AAC SSR (Scalable Sample Rate)"); audioObjectTypes.put(4, "AAC LTP (Long Term Prediction)"); audioObjectTypes.put(5, "SBR (Spectral Band Replication)"); audioObjectTypes.put(6, "AAC Scalable"); audioObjectTypes.put(7, "TwinVQ"); audioObjectTypes.put(8, "CELP (Code Excited Linear Prediction)"); audioObjectTypes.put(9, "HXVC (Harmonic Vector eXcitation Coding)"); audioObjectTypes.put(10, "Reserved"); audioObjectTypes.put(11, "Reserved"); audioObjectTypes.put(12, "TTSI (Text-To-Speech Interface)"); audioObjectTypes.put(13, "Main Synthesis"); audioObjectTypes.put(14, "Wavetable Synthesis"); audioObjectTypes.put(15, "General MIDI"); audioObjectTypes.put(16, "Algorithmic Synthesis and Audio Effects"); audioObjectTypes.put(17, "ER (Error Resilient) AAC LC"); audioObjectTypes.put(18, "Reserved"); audioObjectTypes.put(19, "ER AAC LTP"); audioObjectTypes.put(20, "ER AAC Scalable"); audioObjectTypes.put(21, "ER TwinVQ"); audioObjectTypes.put(22, "ER BSAC (Bit-Sliced Arithmetic Coding)"); audioObjectTypes.put(23, "ER AAC LD (Low Delay)"); audioObjectTypes.put(24, "ER CELP"); audioObjectTypes.put(25, "ER HVXC"); audioObjectTypes.put(26, "ER HILN (Harmonic and Individual Lines plus Noise)"); audioObjectTypes.put(27, "ER Parametric"); audioObjectTypes.put(28, "SSC (SinuSoidal Coding)"); audioObjectTypes.put(29, "PS (Parametric Stereo)"); audioObjectTypes.put(30, "MPEG Surround"); audioObjectTypes.put(31, "(Escape value)"); audioObjectTypes.put(32, "Layer-1"); audioObjectTypes.put(33, "Layer-2"); audioObjectTypes.put(34, "Layer-3"); audioObjectTypes.put(35, "DST (Direct Stream Transfer)"); audioObjectTypes.put(36, "ALS (Audio Lossless)"); audioObjectTypes.put(37, "SLS (Scalable LosslesS)"); audioObjectTypes.put(38, "SLS non-core"); audioObjectTypes.put(39, "ER AAC ELD (Enhanced Low Delay)"); audioObjectTypes.put(40, "SMR (Symbolic Music Representation) Simple"); audioObjectTypes.put(41, "SMR Main"); audioObjectTypes.put(42, "USAC (Unified Speech and Audio Coding) (no SBR)"); audioObjectTypes.put(43, "SAOC (Spatial Audio Object Coding)"); audioObjectTypes.put(44, "LD MPEG Surround"); audioObjectTypes.put(45, "USAC"); } static { SAMPLING_FREQUENCY_INDEX_MAP.put(96000, 0); SAMPLING_FREQUENCY_INDEX_MAP.put(88200, 1); SAMPLING_FREQUENCY_INDEX_MAP.put(64000, 2); SAMPLING_FREQUENCY_INDEX_MAP.put(48000, 3); SAMPLING_FREQUENCY_INDEX_MAP.put(44100, 4); SAMPLING_FREQUENCY_INDEX_MAP.put(32000, 5); SAMPLING_FREQUENCY_INDEX_MAP.put(24000, 6); SAMPLING_FREQUENCY_INDEX_MAP.put(22050, 7); SAMPLING_FREQUENCY_INDEX_MAP.put(16000, 8); SAMPLING_FREQUENCY_INDEX_MAP.put(12000, 9); SAMPLING_FREQUENCY_INDEX_MAP.put(11025, 10); SAMPLING_FREQUENCY_INDEX_MAP.put(8000, 11); SAMPLING_FREQUENCY_INDEX_MAP.put(0x0, 96000); SAMPLING_FREQUENCY_INDEX_MAP.put(0x1, 88200); SAMPLING_FREQUENCY_INDEX_MAP.put(0x2, 64000); SAMPLING_FREQUENCY_INDEX_MAP.put(0x3, 48000); SAMPLING_FREQUENCY_INDEX_MAP.put(0x4, 44100); SAMPLING_FREQUENCY_INDEX_MAP.put(0x5, 32000); SAMPLING_FREQUENCY_INDEX_MAP.put(0x6, 24000); SAMPLING_FREQUENCY_INDEX_MAP.put(0x7, 22050); SAMPLING_FREQUENCY_INDEX_MAP.put(0x8, 16000); SAMPLING_FREQUENCY_INDEX_MAP.put(0x9, 12000); SAMPLING_FREQUENCY_INDEX_MAP.put(0xa, 11025); SAMPLING_FREQUENCY_INDEX_MAP.put(0xb, 8000); } TrackMetaData trackMetaData = new TrackMetaData(); SampleDescriptionBox sampleDescriptionBox; long[] decTimes; AdtsHeader firstHeader; int bufferSizeDB; long maxBitRate; long avgBitRate; private DataSource dataSource; private List<Sample> samples; public AACTrackImpl(DataSource dataSource) throws IOException { this(dataSource, "eng"); } public AACTrackImpl(DataSource dataSource, String lang) throws IOException { super(dataSource.toString()); this.dataSource = dataSource; samples = new ArrayList<Sample>(); firstHeader = readSamples(dataSource); double packetsPerSecond = (double) firstHeader.sampleRate / 1024.0; double duration = samples.size() / packetsPerSecond; long dataSize = 0; LinkedList<Integer> queue = new LinkedList<Integer>(); for (Sample sample : samples) { int size = (int) sample.getSize(); dataSize += size; queue.add(size); while (queue.size() > packetsPerSecond) { queue.pop(); } if (queue.size() == (int) packetsPerSecond) { int currSize = 0; for (Integer aQueue : queue) { currSize += aQueue; } double currBitrate = 8.0 * currSize / queue.size() * packetsPerSecond; if (currBitrate > maxBitRate) { maxBitRate = (int) currBitrate; } } } avgBitRate = (int) (8 * dataSize / duration); bufferSizeDB = 1536; /* TODO: Calcultate this somehow! */ sampleDescriptionBox = new SampleDescriptionBox(); AudioSampleEntry audioSampleEntry = new AudioSampleEntry("mp4a"); if (firstHeader.channelconfig == 7) { audioSampleEntry.setChannelCount(8); } else { audioSampleEntry.setChannelCount(firstHeader.channelconfig); } audioSampleEntry.setSampleRate(firstHeader.sampleRate); audioSampleEntry.setDataReferenceIndex(1); audioSampleEntry.setSampleSize(16); ESDescriptorBox esds = new ESDescriptorBox(); ESDescriptor descriptor = new ESDescriptor(); descriptor.setEsId(0); SLConfigDescriptor slConfigDescriptor = new SLConfigDescriptor(); slConfigDescriptor.setPredefined(2); descriptor.setSlConfigDescriptor(slConfigDescriptor); DecoderConfigDescriptor decoderConfigDescriptor = new DecoderConfigDescriptor(); decoderConfigDescriptor.setObjectTypeIndication(0x40); decoderConfigDescriptor.setStreamType(5); decoderConfigDescriptor.setBufferSizeDB(bufferSizeDB); decoderConfigDescriptor.setMaxBitRate(maxBitRate); decoderConfigDescriptor.setAvgBitRate(avgBitRate); AudioSpecificConfig audioSpecificConfig = new AudioSpecificConfig(); audioSpecificConfig.setOriginalAudioObjectType(2); // AAC LC audioSpecificConfig.setSamplingFrequencyIndex(firstHeader.sampleFrequencyIndex); audioSpecificConfig.setChannelConfiguration(firstHeader.channelconfig); decoderConfigDescriptor.setAudioSpecificInfo(audioSpecificConfig); descriptor.setDecoderConfigDescriptor(decoderConfigDescriptor); esds.setEsDescriptor(descriptor); audioSampleEntry.addBox(esds); sampleDescriptionBox.addBox(audioSampleEntry); trackMetaData.setCreationTime(new Date()); trackMetaData.setModificationTime(new Date()); trackMetaData.setLanguage(lang); trackMetaData.setVolume(1); trackMetaData.setTimescale(firstHeader.sampleRate); // Audio tracks always use sampleRate as timescale decTimes = new long[samples.size()]; Arrays.fill(decTimes, 1024); } public void close() throws IOException { // doing everything to get rid of references to memory mapped things dataSource.close(); } public SampleDescriptionBox getSampleDescriptionBox() { return sampleDescriptionBox; } public long[] getSampleDurations() { return decTimes; } public List<CompositionTimeToSample.Entry> getCompositionTimeEntries() { return null; } public long[] getSyncSamples() { return null; } public List<SampleDependencyTypeBox.Entry> getSampleDependencies() { return null; } public TrackMetaData getTrackMetaData() { return trackMetaData; } public String getHandler() { return "soun"; } public List<Sample> getSamples() { return samples; } public SubSampleInformationBox getSubsampleInformationBox() { return null; } private AdtsHeader readADTSHeader(DataSource channel) throws IOException { AdtsHeader hdr = new AdtsHeader(); ByteBuffer bb = ByteBuffer.allocate(7); while (bb.position() < 7) { if (channel.read(bb) == -1) { return null; } } BitReaderBuffer brb = new BitReaderBuffer((ByteBuffer) bb.rewind()); int syncword = brb.readBits(12); // A if (syncword != 0xfff) { throw new IOException("Expected Start Word 0xfff"); } hdr.mpegVersion = brb.readBits(1); // B hdr.layer = brb.readBits(2); // C hdr.protectionAbsent = brb.readBits(1); // D hdr.profile = brb.readBits(2) + 1; // E //System.err.println(String.format("Profile %s", audioObjectTypes.get(hdr.profile))); hdr.sampleFrequencyIndex = brb.readBits(4); hdr.sampleRate = SAMPLING_FREQUENCY_INDEX_MAP.get(hdr.sampleFrequencyIndex); // F brb.readBits(1); // G hdr.channelconfig = brb.readBits(3); // H hdr.original = brb.readBits(1); // I hdr.home = brb.readBits(1); // J hdr.copyrightedStream = brb.readBits(1); // K hdr.copyrightStart = brb.readBits(1); // L hdr.frameLength = brb.readBits(13); // M //System.err.println(hdr.frameLength); hdr.bufferFullness = brb.readBits(11); // 54 hdr.numAacFramesPerAdtsFrame = brb.readBits(2) + 1; // 56 if (hdr.numAacFramesPerAdtsFrame != 1) { throw new IOException("This muxer can only work with 1 AAC frame per ADTS frame"); } if (hdr.protectionAbsent == 0) { channel.read(ByteBuffer.allocate(2)); } return hdr; } private AdtsHeader readSamples(DataSource channel) throws IOException { AdtsHeader first = null; AdtsHeader hdr; while ((hdr = readADTSHeader(channel)) != null) { if (first == null) { first = hdr; } final long currentPosition = channel.position(); final long frameSize = hdr.frameLength - hdr.getSize(); samples.add(new Sample() { public void writeTo(WritableByteChannel channel) throws IOException { dataSource.transferTo(currentPosition, frameSize, channel); } public long getSize() { return frameSize; } public ByteBuffer asByteBuffer() { try { return dataSource.map(currentPosition, frameSize); } catch (IOException e) { throw new RuntimeException(e); } } }); channel.position(channel.position() + hdr.frameLength - hdr.getSize()); } return first; } @Override public String toString() { return "AACTrackImpl{" + "sampleRate=" + firstHeader.sampleRate + ", channelconfig=" + firstHeader.channelconfig + '}'; } class AdtsHeader { int sampleFrequencyIndex; int mpegVersion; int layer; int protectionAbsent; int profile; int sampleRate; int channelconfig; int original; int home; int copyrightedStream; int copyrightStart; int frameLength; int bufferFullness; int numAacFramesPerAdtsFrame; int getSize() { return 7 + (protectionAbsent == 0 ? 2 : 0); } } }