nellymoserenc.c
Go to the documentation of this file.
1 /*
2  * Nellymoser encoder
3  * This code is developed as part of Google Summer of Code 2008 Program.
4  *
5  * Copyright (c) 2008 Bartlomiej Wolowiec
6  *
7  * This file is part of Libav.
8  *
9  * Libav is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * Libav is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with Libav; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
38 #include "libavutil/mathematics.h"
39 #include "nellymoser.h"
40 #include "avcodec.h"
41 #include "dsputil.h"
42 #include "fft.h"
43 #include "sinewin.h"
44 
45 #define BITSTREAM_WRITER_LE
46 #include "put_bits.h"
47 
48 #define POW_TABLE_SIZE (1<<11)
49 #define POW_TABLE_OFFSET 3
50 #define OPT_SIZE ((1<<15) + 3000)
51 
52 typedef struct NellyMoserEncodeContext {
55  int bufsel;
61  DECLARE_ALIGNED(32, float, buf)[2][3 * NELLY_BUF_LEN];
62  float (*opt )[NELLY_BANDS];
63  uint8_t (*path)[NELLY_BANDS];
65 
66 static float pow_table[POW_TABLE_SIZE];
67 
68 static const uint8_t sf_lut[96] = {
69  0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
70  5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14,
71  15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
72  27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
73  41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
74  54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
75 };
76 
77 static const uint8_t sf_delta_lut[78] = {
78  0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
79  4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12,
80  13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
81  23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
82  28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
83 };
84 
85 static const uint8_t quant_lut[230] = {
86  0,
87 
88  0, 1, 2,
89 
90  0, 1, 2, 3, 4, 5, 6,
91 
92  0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11,
93  12, 13, 13, 13, 14,
94 
95  0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8,
96  8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
97  22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
98  30,
99 
100  0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3,
101  4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,
102  10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
103  15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
104  21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
105  33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
106  46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
107  53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
108  58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
109  61, 61, 61, 61, 62,
110 };
111 
112 static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 };
113 static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 };
114 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
115 
117 {
118  s->dsp.vector_fmul(s->in_buff, s->buf[s->bufsel], ff_sine_128, NELLY_BUF_LEN);
119  s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128,
120  NELLY_BUF_LEN);
121  s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
122 
124  ff_sine_128, NELLY_BUF_LEN);
125  s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128,
126  NELLY_BUF_LEN);
128 }
129 
131 {
133  int i;
134 
135  if (avctx->channels != 1) {
136  av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
137  return -1;
138  }
139 
140  if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
141  avctx->sample_rate != 11025 &&
142  avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
144  av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
145  return -1;
146  }
147 
148  avctx->frame_size = NELLY_SAMPLES;
149  s->avctx = avctx;
150  ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0);
151  dsputil_init(&s->dsp, avctx);
152 
153  /* Generate overlap window */
154  ff_sine_window_init(ff_sine_128, 128);
155  for (i = 0; i < POW_TABLE_SIZE; i++)
156  pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
157 
158  if (s->avctx->trellis) {
159  s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float ));
160  s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
161  }
162 
163  return 0;
164 }
165 
167 {
169 
170  ff_mdct_end(&s->mdct_ctx);
171 
172  if (s->avctx->trellis) {
173  av_free(s->opt);
174  av_free(s->path);
175  }
176 
177  return 0;
178 }
179 
180 #define find_best(val, table, LUT, LUT_add, LUT_size) \
181  best_idx = \
182  LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
183  if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
184  best_idx++;
185 
186 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
187 {
188  int band, best_idx, power_idx = 0;
189  float power_candidate;
190 
191  //base exponent
192  find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
193  idx_table[0] = best_idx;
194  power_idx = ff_nelly_init_table[best_idx];
195 
196  for (band = 1; band < NELLY_BANDS; band++) {
197  power_candidate = cand[band] - power_idx;
198  find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
199  idx_table[band] = best_idx;
200  power_idx += ff_nelly_delta_table[best_idx];
201  }
202 }
203 
204 static inline float distance(float x, float y, int band)
205 {
206  //return pow(fabs(x-y), 2.0);
207  float tmp = x - y;
208  return tmp * tmp;
209 }
210 
211 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
212 {
213  int i, j, band, best_idx;
214  float power_candidate, best_val;
215 
216  float (*opt )[NELLY_BANDS] = s->opt ;
217  uint8_t(*path)[NELLY_BANDS] = s->path;
218 
219  for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
220  opt[0][i] = INFINITY;
221  }
222 
223  for (i = 0; i < 64; i++) {
224  opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
225  path[0][ff_nelly_init_table[i]] = i;
226  }
227 
228  for (band = 1; band < NELLY_BANDS; band++) {
229  int q, c = 0;
230  float tmp;
231  int idx_min, idx_max, idx;
232  power_candidate = cand[band];
233  for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
234  idx_min = FFMAX(0, cand[band] - q);
235  idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
236  for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
237  if ( isinf(opt[band - 1][i]) )
238  continue;
239  for (j = 0; j < 32; j++) {
240  idx = i + ff_nelly_delta_table[j];
241  if (idx > idx_max)
242  break;
243  if (idx >= idx_min) {
244  tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
245  if (opt[band][idx] > tmp) {
246  opt[band][idx] = tmp;
247  path[band][idx] = j;
248  c = 1;
249  }
250  }
251  }
252  }
253  }
254  assert(c); //FIXME
255  }
256 
257  best_val = INFINITY;
258  best_idx = -1;
259  band = NELLY_BANDS - 1;
260  for (i = 0; i < OPT_SIZE; i++) {
261  if (best_val > opt[band][i]) {
262  best_val = opt[band][i];
263  best_idx = i;
264  }
265  }
266  for (band = NELLY_BANDS - 1; band >= 0; band--) {
267  idx_table[band] = path[band][best_idx];
268  if (band) {
269  best_idx -= ff_nelly_delta_table[path[band][best_idx]];
270  }
271  }
272 }
273 
280 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
281 {
282  PutBitContext pb;
283  int i, j, band, block, best_idx, power_idx = 0;
284  float power_val, coeff, coeff_sum;
285  float pows[NELLY_FILL_LEN];
286  int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
287  float cand[NELLY_BANDS];
288 
289  apply_mdct(s);
290 
291  init_put_bits(&pb, output, output_size * 8);
292 
293  i = 0;
294  for (band = 0; band < NELLY_BANDS; band++) {
295  coeff_sum = 0;
296  for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
297  coeff_sum += s->mdct_out[i ] * s->mdct_out[i ]
298  + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
299  }
300  cand[band] =
301  log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
302  }
303 
304  if (s->avctx->trellis) {
305  get_exponent_dynamic(s, cand, idx_table);
306  } else {
307  get_exponent_greedy(s, cand, idx_table);
308  }
309 
310  i = 0;
311  for (band = 0; band < NELLY_BANDS; band++) {
312  if (band) {
313  power_idx += ff_nelly_delta_table[idx_table[band]];
314  put_bits(&pb, 5, idx_table[band]);
315  } else {
316  power_idx = ff_nelly_init_table[idx_table[0]];
317  put_bits(&pb, 6, idx_table[0]);
318  }
319  power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
320  for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
321  s->mdct_out[i] *= power_val;
322  s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
323  pows[i] = power_idx;
324  }
325  }
326 
327  ff_nelly_get_sample_bits(pows, bits);
328 
329  for (block = 0; block < 2; block++) {
330  for (i = 0; i < NELLY_FILL_LEN; i++) {
331  if (bits[i] > 0) {
332  const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
333  coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
334  best_idx =
335  quant_lut[av_clip (
336  coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
337  quant_lut_offset[bits[i]],
338  quant_lut_offset[bits[i]+1] - 1
339  )];
340  if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
341  best_idx++;
342 
343  put_bits(&pb, bits[i], best_idx);
344  }
345  }
346  if (!block)
348  }
349 
350  flush_put_bits(&pb);
351 }
352 
353 static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data)
354 {
356  const float *samples = data;
357  int i;
358 
359  if (s->last_frame)
360  return 0;
361 
362  if (data) {
363  memcpy(s->buf[s->bufsel], samples, avctx->frame_size * sizeof(*samples));
364  for (i = avctx->frame_size; i < NELLY_SAMPLES; i++) {
365  s->buf[s->bufsel][i] = 0;
366  }
367  s->bufsel = 1 - s->bufsel;
368  if (!s->have_saved) {
369  s->have_saved = 1;
370  return 0;
371  }
372  } else {
373  memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN);
374  s->bufsel = 1 - s->bufsel;
375  s->last_frame = 1;
376  }
377 
378  if (s->have_saved) {
379  encode_block(s, frame, buf_size);
380  return NELLY_BLOCK_LEN;
381  }
382  return 0;
383 }
384 
386  .name = "nellymoser",
387  .type = AVMEDIA_TYPE_AUDIO,
388  .id = CODEC_ID_NELLYMOSER,
389  .priv_data_size = sizeof(NellyMoserEncodeContext),
390  .init = encode_init,
391  .encode = encode_frame,
392  .close = encode_end,
394  .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
395  .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_NONE},
396 };
static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data)
static short * samples
Definition: ffmpeg.c:233
float buf[2][3 *NELLY_BUF_LEN]
sample buffer
Definition: nellymoserenc.c:61
void(* mdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input)
Definition: fft.h:83
static av_cold int encode_end(AVCodecContext *avctx)
static const float quant_lut_add[7]
float mdct_out[NELLY_SAMPLES]
Definition: nellymoserenc.c:59
#define DECLARE_ALIGNED(n, t, v)
Definition: mem.h:55
static void apply_mdct(NellyMoserEncodeContext *s)
const float ff_nelly_dequantization_table[127]
Definition: nellymoser.c:41
AVCodec.
Definition: avcodec.h:3189
#define NELLY_DETAIL_BITS
Definition: nellymoser.h:42
const uint16_t ff_nelly_init_table[64]
Definition: nellymoser.c:72
uint8_t bits
Definition: crc.c:31
#define av_cold
Definition: attributes.h:71
static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
const char data[16]
Definition: mxf.c:60
float(* opt)[NELLY_BANDS]
Definition: nellymoserenc.c:62
av_cold void dsputil_init(DSPContext *c, AVCodecContext *avctx)
Definition: dsputil.c:2789
static int init(AVCodecParserContext *s)
Definition: h264_parser.c:336
#define CODEC_CAP_SMALL_LAST_FRAME
Codec can be fed a final frame with a smaller size.
Definition: avcodec.h:755
struct NellyMoserEncodeContext NellyMoserEncodeContext
static float pow_table[POW_TABLE_SIZE]
-pow(2, -i / 2048.0 - 3.0);
Definition: nellymoserenc.c:66
void av_free(void *ptr)
Free a memory block which has been allocated with av_malloc(z)() or av_realloc(). ...
Definition: mem.c:137
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:191
static av_cold int encode_init(AVCodecContext *avctx)
void av_log(void *avcl, int level, const char *fmt,...)
Definition: log.c:140
const char * name
Name of the codec implementation.
Definition: avcodec.h:3196
static void put_bits(PutBitContext *s, int n, unsigned int value)
Write up to 31 bits into a bitstream.
Definition: put_bits.h:136
#define ff_mdct_init
Definition: fft.h:146
#define FFMAX(a, b)
Definition: common.h:53
static int put_bits_count(PutBitContext *s)
Definition: put_bits.h:70
static float distance(float x, float y, int band)
float in_buff[NELLY_SAMPLES]
Definition: nellymoserenc.c:60
Definition: fft.h:62
static DCTELEM block[64]
Definition: dct-test.c:189
#define M_LN2
Definition: mathematics.h:33
#define NELLY_BLOCK_LEN
Definition: nellymoser.h:40
const uint8_t ff_nelly_band_sizes_table[NELLY_BANDS]
Definition: nellymoser.c:68
#define FFMIN(a, b)
Definition: common.h:55
#define POW_TABLE_OFFSET
Definition: nellymoserenc.c:49
AVCodecContext * avctx
Definition: nellymoserenc.c:53
#define NELLY_HEADER_BITS
Definition: nellymoser.h:41
AVCodec ff_nellymoser_encoder
void ff_sine_window_init(float *window, int n)
Generate a sine window.
#define OPT_SIZE
Definition: nellymoserenc.c:50
int frame_size
Samples per packet, initialized when calling 'init'.
Definition: avcodec.h:1470
static const uint8_t quant_lut[230]
Definition: nellymoserenc.c:85
external API header
int sample_rate
samples per second
Definition: avcodec.h:1456
static const uint8_t sf_lut[96]
Definition: nellymoserenc.c:68
main external API structure.
Definition: avcodec.h:1329
static void close(AVCodecParserContext *s)
Definition: h264_parser.c:327
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:111
void * av_malloc(size_t size)
Allocate a block of size bytes with alignment suitable for all memory accesses (including vectors if ...
Definition: mem.c:64
#define FF_COMPLIANCE_NORMAL
Definition: avcodec.h:1645
static const uint8_t quant_lut_offset[8]
#define find_best(val, table, LUT, LUT_add, LUT_size)
void(* vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len)
Definition: dsputil.h:413
uint8_t(* path)[NELLY_BANDS]
Definition: nellymoserenc.c:63
static void flush_put_bits(PutBitContext *s)
Pad the end of the output stream with zeros.
Definition: put_bits.h:86
void(* vector_fmul)(float *dst, const float *src0, const float *src1, int len)
Definition: dsputil.h:412
#define ff_mdct_end
Definition: fft.h:147
#define NELLY_BUF_LEN
Definition: nellymoser.h:43
AVSampleFormat
all in native-endian format
Definition: samplefmt.h:27
static void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
Initialize the PutBitContext s.
Definition: put_bits.h:52
static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
Encode NELLY_SAMPLES samples.
int trellis
trellis RD quantization
Definition: avcodec.h:2690
DSP utils.
void * priv_data
Definition: avcodec.h:1531
int channels
number of audio channels
Definition: avcodec.h:1457
const int16_t ff_nelly_delta_table[32]
Definition: nellymoser.c:81
#define NELLY_BANDS
Definition: nellymoser.h:39
#define CODEC_CAP_DELAY
Encoder or decoder requires flushing with NULL input at the end in order to give the complete and cor...
Definition: avcodec.h:750
#define POW_TABLE_SIZE
Definition: nellymoserenc.c:48
void ff_nelly_get_sample_bits(const float *buf, int *bits)
Definition: nellymoser.c:118
#define INFINITY
Definition: mathematics.h:57
The 3 alphanumeric copyright notices are md5summed they are from the original implementors.
#define NELLY_SAMPLES
Definition: nellymoser.h:48
static const float quant_lut_mul[7]
#define NELLY_FILL_LEN
Definition: nellymoser.h:44
int strict_std_compliance
strictly follow the standard (MPEG4, ...).
Definition: avcodec.h:1642
DSPContext.
Definition: dsputil.h:226
static const uint8_t sf_delta_lut[78]
Definition: nellymoserenc.c:77
bitstream writer API