| 1 | /* |
| 2 | * AAC decoder |
| 3 | * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz> |
| 4 | * Copyright (c) 2010 Janne Grunau <janne-ffmpeg@jannau.net> |
| 5 | * |
| 6 | * This file is part of FFmpeg. |
| 7 | * |
| 8 | * FFmpeg is free software; you can redistribute it and/or |
| 9 | * modify it under the terms of the GNU Lesser General Public |
| 10 | * License as published by the Free Software Foundation; either |
| 11 | * version 2.1 of the License, or (at your option) any later version. |
| 12 | * |
| 13 | * FFmpeg is distributed in the hope that it will be useful, |
| 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 16 | * Lesser General Public License for more details. |
| 17 | * |
| 18 | * You should have received a copy of the GNU Lesser General Public |
| 19 | * License along with FFmpeg; if not, write to the Free Software |
| 20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 21 | */ |
| 22 | |
| 23 | /** |
| 24 | * @file |
| 25 | * AAC decoder |
| 26 | * @author Paul Kendall <paul@kcbbs.gen.nz> |
| 27 | * @author Janne Grunau <janne-ffmpeg@jannau.net> |
| 28 | */ |
| 29 | |
| 30 | /* |
| 31 | Note: This decoder filter is intended to decode LATM streams transferred |
| 32 | in MPEG transport streams which only contain one program. |
| 33 | To do a more complex LATM demuxing a separate LATM demuxer should be used. |
| 34 | */ |
| 35 | |
| 36 | #include "get_bits.h" |
| 37 | #include "dsputil.h" |
| 38 | |
| 39 | #include "aac.h" |
| 40 | #include "aacdectab.h" |
| 41 | #include "mpeg4audio.h" |
| 42 | |
| 43 | #include "libavutil/avassert.h" |
| 44 | |
| 45 | #define SYNC_LATM 0x2b7 // 11 bits |
| 46 | #define MAX_SIZE 8*1024 |
| 47 | |
| 48 | struct LATMContext |
| 49 | { |
| 50 | AACContext aac_ctx; |
| 51 | AVCodec *aac_codec; |
| 52 | uint8_t initialized; |
| 53 | |
| 54 | // parser data |
| 55 | uint8_t audio_mux_version_A; |
| 56 | uint8_t same_time_framing; |
| 57 | uint8_t frame_length_type; |
| 58 | uint32_t frame_length; |
| 59 | }; |
| 60 | |
| 61 | static inline int64_t latm_get_value(GetBitContext *b) |
| 62 | { |
| 63 | uint8_t bytesForValue = get_bits(b, 2); |
| 64 | int64_t value = 0; |
| 65 | int i; |
| 66 | for (i=0; i<=bytesForValue; i++) { |
| 67 | value <<= 8; |
| 68 | value |= get_bits(b, 8); |
| 69 | } |
| 70 | return value; |
| 71 | } |
| 72 | // copied from libavcodec/mpeg4audio.c |
| 73 | static av_always_inline unsigned int copy_bits(PutBitContext *pb, |
| 74 | GetBitContext *gb, |
| 75 | int bits) |
| 76 | { |
| 77 | unsigned int el = get_bits(gb, bits); |
| 78 | put_bits(pb, bits, el); |
| 79 | return el; |
| 80 | } |
| 81 | |
| 82 | static void latm_read_ga_specific_config(int audio_object_type, |
| 83 | MPEG4AudioConfig *c, |
| 84 | GetBitContext *gb, |
| 85 | PutBitContext *pb) |
| 86 | { |
| 87 | int ext_flag; |
| 88 | |
| 89 | copy_bits(pb, gb, 1); // framelen_flag |
| 90 | if (copy_bits(pb, gb, 1)) // depends_on_coder |
| 91 | copy_bits(pb, gb, 14); // delay |
| 92 | ext_flag = copy_bits(pb, gb, 1); |
| 93 | |
| 94 | if (!c->chan_config) |
| 95 | ff_copy_pce_data(pb, gb); // program_config_element |
| 96 | |
| 97 | if (audio_object_type == AOT_AAC_SCALABLE || |
| 98 | audio_object_type == AOT_ER_AAC_SCALABLE) |
| 99 | copy_bits(pb, gb, 3); // layer number |
| 100 | |
| 101 | if (!ext_flag) |
| 102 | return; |
| 103 | |
| 104 | if (audio_object_type == AOT_ER_BSAC) { |
| 105 | copy_bits(pb, gb, 5); // numOfSubFrame |
| 106 | copy_bits(pb, gb, 11); // layer_length |
| 107 | } else if (audio_object_type == AOT_ER_AAC_LC || |
| 108 | audio_object_type == AOT_ER_AAC_LTP || |
| 109 | audio_object_type == AOT_ER_AAC_SCALABLE || |
| 110 | audio_object_type == AOT_ER_AAC_LD) |
| 111 | copy_bits(pb, gb, 3); // stuff |
| 112 | copy_bits(pb, gb, 1); // extflag3 |
| 113 | } |
| 114 | |
| 115 | static int latm_read_audio_specific_config(GetBitContext *gb, |
| 116 | PutBitContext *pb) |
| 117 | { |
| 118 | int num_bits=0; |
| 119 | int audio_object_type; |
| 120 | |
| 121 | MPEG4AudioConfig b, *c; |
| 122 | c = &b; |
| 123 | |
| 124 | c->sbr = -1; |
| 125 | |
| 126 | audio_object_type = copy_bits(pb, gb, 5); |
| 127 | if (audio_object_type == AOT_ESCAPE) { |
| 128 | audio_object_type = AOT_ESCAPE + copy_bits(pb, gb, 6) + 1; |
| 129 | } |
| 130 | c->object_type = audio_object_type; |
| 131 | |
| 132 | c->sampling_index = copy_bits(pb, gb, 4); |
| 133 | c->sample_rate = ff_mpeg4audio_sample_rates[c->sampling_index]; |
| 134 | if (c->sampling_index == 0x0f) { |
| 135 | c->sample_rate = copy_bits(pb, gb, 24); |
| 136 | } |
| 137 | c->chan_config = copy_bits(pb, gb, 4); |
| 138 | |
| 139 | if (c->chan_config < FF_ARRAY_ELEMS(ff_mpeg4audio_channels)) |
| 140 | c->channels = ff_mpeg4audio_channels[c->chan_config]; |
| 141 | |
| 142 | if (audio_object_type == AOT_AAC_MAIN || |
| 143 | audio_object_type == AOT_AAC_LC || |
| 144 | audio_object_type == AOT_AAC_SSR || |
| 145 | audio_object_type == AOT_AAC_LTP || |
| 146 | audio_object_type == AOT_AAC_SCALABLE || |
| 147 | audio_object_type == AOT_TWINVQ) { |
| 148 | latm_read_ga_specific_config(audio_object_type, c, gb, pb); |
| 149 | } else if (audio_object_type == AOT_SBR) { |
| 150 | c->sbr = 1; |
| 151 | c->ext_sampling_index = copy_bits(pb, gb, 4); |
| 152 | c->ext_sample_rate = ff_mpeg4audio_sample_rates[c->ext_sampling_index]; |
| 153 | if (c->ext_sampling_index == 0x0f) { |
| 154 | c->ext_sample_rate = copy_bits(pb, gb, 24); |
| 155 | } |
| 156 | c->object_type = copy_bits(pb, gb, 5); |
| 157 | } else if (audio_object_type >= AOT_ER_AAC_LC) { |
| 158 | latm_read_ga_specific_config(audio_object_type, c, gb, pb); |
| 159 | copy_bits(pb, gb, 2); // epConfig |
| 160 | } |
| 161 | |
| 162 | if (c->sbr == -1 && c->sample_rate <= 24000) |
| 163 | c->sample_rate *= 2; |
| 164 | |
| 165 | // count the extradata |
| 166 | num_bits = put_bits_count(pb); |
| 167 | |
| 168 | flush_put_bits(pb); |
| 169 | return num_bits; |
| 170 | } |
| 171 | |
| 172 | static int latm_decode_audio_specific_config(struct LATMContext *latmctx, |
| 173 | GetBitContext *gb) |
| 174 | { |
| 175 | PutBitContext pb; |
| 176 | int32_t extra_size, bits_consumed; |
| 177 | uint8_t extradata[32+FF_INPUT_BUFFER_PADDING_SIZE]; |
| 178 | AVCodecContext *avctx = latmctx->aac_ctx.avctx; |
| 179 | |
| 180 | init_put_bits(&pb, extradata, 32 * 8); |
| 181 | |
| 182 | bits_consumed = latm_read_audio_specific_config(gb, &pb); |
| 183 | |
| 184 | if (bits_consumed < 0) |
| 185 | return AVERROR_INVALIDDATA; |
| 186 | |
| 187 | extra_size = (bits_consumed+7) / 8; |
| 188 | |
| 189 | if (avctx->extradata_size != extra_size) { |
| 190 | av_free(avctx->extradata); |
| 191 | avctx->extradata = av_malloc(extra_size + FF_INPUT_BUFFER_PADDING_SIZE); |
| 192 | if (!avctx->extradata) |
| 193 | return AVERROR(ENOMEM); |
| 194 | |
| 195 | avctx->extradata_size = extra_size; |
| 196 | memcpy(avctx->extradata, extradata, extra_size); |
| 197 | memset(avctx->extradata+extra_size, 0, FF_INPUT_BUFFER_PADDING_SIZE); |
| 198 | } |
| 199 | |
| 200 | return bits_consumed; |
| 201 | } |
| 202 | |
| 203 | static int read_stream_mux_config(struct LATMContext *latmctx, |
| 204 | GetBitContext *gb) |
| 205 | { |
| 206 | int ret, audio_mux_version = get_bits(gb, 1); |
| 207 | |
| 208 | latmctx->audio_mux_version_A = 0; |
| 209 | if (audio_mux_version) |
| 210 | latmctx->audio_mux_version_A = get_bits(gb, 1); |
| 211 | |
| 212 | if (!latmctx->audio_mux_version_A) { |
| 213 | |
| 214 | if (audio_mux_version) |
| 215 | latm_get_value(gb); // taraFullness |
| 216 | |
| 217 | latmctx->same_time_framing = get_bits(gb, 1); // allStreamSameTimeFraming |
| 218 | skip_bits(gb, 6); // numSubFrames |
| 219 | av_assert0(get_bits(gb, 4) == 0); // numPrograms |
| 220 | |
| 221 | // for each program (which there is only on in DVB) |
| 222 | |
| 223 | // for each layer (which there is only on in DVB) |
| 224 | if (get_bits(gb, 3)) { // numLayer |
| 225 | av_log_missing_feature(latmctx->aac_ctx.avctx, |
| 226 | "multiple layers are not supported\n", 1); |
| 227 | return AVERROR_PATCHWELCOME; |
| 228 | } |
| 229 | |
| 230 | // for all but first stream: use_same_config = get_bits(gb, 1); |
| 231 | if (!audio_mux_version) { |
| 232 | ret = latm_decode_audio_specific_config(latmctx, gb); |
| 233 | if (ret < 0) |
| 234 | return ret; |
| 235 | } else { |
| 236 | int ascLen = latm_get_value(gb); |
| 237 | ret = latm_decode_audio_specific_config(latmctx, gb); |
| 238 | if (ret < 0) |
| 239 | return ret; |
| 240 | ascLen -= ret; |
| 241 | skip_bits_long(gb, ascLen); |
| 242 | } |
| 243 | |
| 244 | latmctx->frame_length_type = get_bits(gb, 3); |
| 245 | switch (latmctx->frame_length_type) { |
| 246 | case 0: |
| 247 | skip_bits(gb, 8); // latmBufferFullness |
| 248 | break; |
| 249 | case 1: |
| 250 | latmctx->frame_length = get_bits(gb, 9); |
| 251 | break; |
| 252 | case 3: |
| 253 | case 4: |
| 254 | case 5: |
| 255 | skip_bits(gb, 6); // CELP frame length table index |
| 256 | break; |
| 257 | case 6: |
| 258 | case 7: |
| 259 | skip_bits(gb, 1); // HVXC frame length table index |
| 260 | break; |
| 261 | } |
| 262 | |
| 263 | if (get_bits(gb, 1)) { // other data |
| 264 | if (audio_mux_version) { |
| 265 | latm_get_value(gb); // other_data_bits |
| 266 | } else { |
| 267 | int esc; |
| 268 | do { |
| 269 | esc = get_bits(gb, 1); |
| 270 | skip_bits(gb, 8); |
| 271 | } while (esc); |
| 272 | } |
| 273 | } |
| 274 | |
| 275 | if (get_bits(gb, 1)) // crc present |
| 276 | skip_bits(gb, 8); // config_crc |
| 277 | } |
| 278 | |
| 279 | return 0; |
| 280 | } |
| 281 | |
| 282 | static int read_payload_length_info(struct LATMContext *ctx, GetBitContext *gb) |
| 283 | { |
| 284 | uint8_t tmp; |
| 285 | |
| 286 | /* if (ctx->same_time_framing) { */ |
| 287 | if (ctx->frame_length_type == 0) { |
| 288 | int mux_slot_length = 0; |
| 289 | do { |
| 290 | tmp = get_bits(gb, 8); |
| 291 | mux_slot_length += tmp; |
| 292 | } while (tmp == 255); |
| 293 | return mux_slot_length; |
| 294 | } else if (ctx->frame_length_type == 1) { |
| 295 | return ctx->frame_length; |
| 296 | } else if (ctx->frame_length_type == 3 || |
| 297 | ctx->frame_length_type == 5 || |
| 298 | ctx->frame_length_type == 7) { |
| 299 | skip_bits(gb, 2); // mux_slot_length_coded |
| 300 | } |
| 301 | /* } else { */ |
| 302 | /* uint8_t num_chunk= get_bits(gb, 4); */ |
| 303 | /* av_log(ctx->aac_ctx.avctx, AV_LOG_ERROR, "!allStreamsSameTimeFraming not handled " */ |
| 304 | /* "%d\n", num_chunk); */ |
| 305 | /* } */ |
| 306 | return 0; |
| 307 | } |
| 308 | |
| 309 | static int read_audio_mux_element(struct LATMContext *latmctx, |
| 310 | GetBitContext *b, |
| 311 | uint8_t *payload, int *payloadsize) |
| 312 | { |
| 313 | uint8_t use_same_mux = get_bits(b, 1); |
| 314 | if (!use_same_mux) { |
| 315 | read_stream_mux_config(latmctx, b); |
| 316 | } else if (!latmctx->aac_ctx.avctx->extradata) { |
| 317 | av_log(latmctx->aac_ctx.avctx, AV_LOG_DEBUG, "no decoder config found\n"); |
| 318 | return AVERROR(EAGAIN); |
| 319 | } |
| 320 | if (latmctx->audio_mux_version_A == 0) { |
| 321 | int j; |
| 322 | int mux_slot_length_bytes = read_payload_length_info(latmctx, b); |
| 323 | mux_slot_length_bytes = FFMIN(mux_slot_length_bytes, *payloadsize); |
| 324 | for (j=0; j<mux_slot_length_bytes; j++) { |
| 325 | *payload++ = get_bits(b, 8); |
| 326 | } |
| 327 | *payloadsize = mux_slot_length_bytes; |
| 328 | } |
| 329 | return 0; |
| 330 | } |
| 331 | |
| 332 | static int readAudioSyncStream(struct LATMContext *latmctx, GetBitContext *b, int size, uint8_t *payload, int *payloadsize) |
| 333 | { |
| 334 | int muxlength; |
| 335 | |
| 336 | if (get_bits(b, 11) != SYNC_LATM) return -1; // not LATM |
| 337 | |
| 338 | muxlength = get_bits(b, 13); |
| 339 | if (muxlength+3 > size) return -1; // not enough data, the parser should have sorted this |
| 340 | |
| 341 | read_audio_mux_element(latmctx, b, payload, payloadsize); |
| 342 | |
| 343 | return 0; |
| 344 | } |
| 345 | |
| 346 | |
| 347 | static int latm_decode_frame(AVCodecContext *avctx, void *out, int *out_size, AVPacket *avpkt) |
| 348 | { |
| 349 | struct LATMContext *latmctx = avctx->priv_data; |
| 350 | uint8_t *tmp, tmpbuf[MAX_SIZE]; |
| 351 | int ret, bufsize = MAX_SIZE; |
| 352 | GetBitContext b; |
| 353 | |
| 354 | if(avpkt->size == 0) |
| 355 | return 0; |
| 356 | |
| 357 | init_get_bits(&b, avpkt->data, avpkt->size * 8); |
| 358 | |
| 359 | if (readAudioSyncStream(latmctx, &b, avpkt->size, tmpbuf, &bufsize)) { |
| 360 | return -1; |
| 361 | } |
| 362 | |
| 363 | if (!latmctx->initialized) { |
| 364 | if (!avctx->extradata) { |
| 365 | *out_size = 0; |
| 366 | return avpkt->size; |
| 367 | } else { |
| 368 | av_assert0(latmctx->aac_codec->init); |
| 369 | ret = latmctx->aac_codec->init(avctx); |
| 370 | if (ret < 0) |
| 371 | return ret; |
| 372 | latmctx->initialized = 1; |
| 373 | } |
| 374 | } |
| 375 | |
| 376 | tmp = avpkt->data; |
| 377 | avpkt->data = tmpbuf; |
| 378 | avpkt->size = bufsize; |
| 379 | |
| 380 | av_assert0(latmctx->aac_codec->decode); |
| 381 | ret = latmctx->aac_codec->decode(avctx, out, out_size, avpkt); |
| 382 | avpkt->data = tmp; |
| 383 | return ret; |
| 384 | } |
| 385 | |
| 386 | static int latm_decode_init(AVCodecContext *avctx) |
| 387 | { |
| 388 | struct LATMContext *latmctx = avctx->priv_data; |
| 389 | int ret; |
| 390 | |
| 391 | latmctx->aac_codec = avcodec_find_decoder_by_name("aac"); |
| 392 | if (!latmctx->aac_codec) { |
| 393 | av_log(avctx, AV_LOG_ERROR, "AAC decoder is required by AAC LATM decoder.\n"); |
| 394 | return AVERROR(ENOSYS); |
| 395 | } |
| 396 | |
| 397 | av_assert0(latmctx->aac_codec->init); |
| 398 | ret = latmctx->aac_codec->init(avctx); |
| 399 | |
| 400 | if (avctx->extradata_size > 0) |
| 401 | latmctx->initialized = !ret; |
| 402 | else |
| 403 | latmctx->initialized = 0; |
| 404 | |
| 405 | return ret; |
| 406 | } |
| 407 | |
| 408 | static int latm_decode_close(AVCodecContext *avctx) |
| 409 | { |
| 410 | struct LATMContext *latmctx = avctx->priv_data; |
| 411 | av_assert0(latmctx->aac_codec->close); |
| 412 | return latmctx->aac_codec->close(avctx); |
| 413 | } |
| 414 | |
| 415 | AVCodec aac_latm_decoder = { |
| 416 | .name = "aac_latm", |
| 417 | .type = CODEC_TYPE_AUDIO, |
| 418 | .id = CODEC_ID_AAC_LATM, |
| 419 | .priv_data_size = sizeof(struct LATMContext), |
| 420 | .init = latm_decode_init, |
| 421 | .close = latm_decode_close, |
| 422 | .decode = latm_decode_frame, |
| 423 | .long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Codec LATM syntax)"), |
| 424 | .sample_fmts = (const enum SampleFormat[]) { |
| 425 | SAMPLE_FMT_S16,SAMPLE_FMT_NONE |
| 426 | }, |
| 427 | .channel_layouts = aac_channel_layout, |
| 428 | }; |