28 #define UNCHECKED_BITSTREAM_READER 1
48 #define MAX_LSPS_ALIGN16 16
51 #define MAX_FRAMESIZE 160
52 #define MAX_SIGNAL_HISTORY 416
53 #define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)
54 #define SFRAME_CACHE_MAXSIZE 256
145 int history_nsamples;
152 int denoise_strength;
154 int denoise_tilt_corr;
163 int frame_lsp_bitsize;
165 int sframe_lsp_bitsize;
172 int block_pitch_nbits;
174 int block_pitch_range;
177 int block_delta_pitch_hrange;
181 uint16_t block_conv_table[4];
195 int has_residual_lsps;
243 int aw_first_pulse_off[2];
246 int aw_next_pulse_off_cache;
254 float gain_pred_err[6];
273 float sin[511], cos[511];
275 float postfilter_agc;
307 static const uint8_t
bits[] = {
310 10, 10, 10, 12, 12, 12,
313 static const uint16_t codes[] = {
314 0x0000, 0x0001, 0x0002,
315 0x000c, 0x000d, 0x000e,
316 0x003c, 0x003d, 0x003e,
317 0x00fc, 0x00fd, 0x00fe,
318 0x03fc, 0x03fd, 0x03fe,
319 0x0ffc, 0x0ffd, 0x0ffe,
320 0x3ffc, 0x3ffd, 0x3ffe, 0x3fff
324 memset(vbm_tree, 0xff,
sizeof(vbm_tree[0]) * 25);
325 memset(cntr, 0,
sizeof(cntr));
326 for (n = 0; n < 17; n++) {
330 vbm_tree[res * 3 + cntr[res]++] = n;
333 bits, 1, 1, codes, 2, 2, 132);
342 int n,
flags, pitch_range, lsp16_flag;
355 "Invalid extradata size %d (should be 46)\n",
369 memcpy(&s->
sin[255], s->
cos, 256 *
sizeof(s->
cos[0]));
370 for (n = 0; n < 255; n++) {
371 s->
sin[n] = -s->
sin[510 - n];
372 s->
cos[510 - n] = s->
cos[n];
378 "Invalid denoise filter strength %d (max=11)\n",
386 lsp16_flag = flags & 0x1000;
396 for (n = 0; n < s->
lsps; n++)
408 if (pitch_range <= 0) {
418 int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
422 "Unsupported samplerate %d (min=%d, max=%d)\n",
473 const float *speech_synth,
474 int size,
float alpha,
float *gain_mem)
477 float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
478 float mem = *gain_mem;
480 for (i = 0; i <
size; i++) {
481 speech_energy += fabsf(speech_synth[i]);
482 postfilter_energy += fabsf(in[i]);
484 gain_scale_factor = (1.0 - alpha) * speech_energy / postfilter_energy;
486 for (i = 0; i <
size; i++) {
487 mem = alpha * mem + gain_scale_factor;
488 out[i] = in[i] * mem;
513 const float *in,
float *out,
int size)
516 float optimal_gain = 0, dot;
524 if (dot > optimal_gain) {
528 }
while (--ptr >= end);
530 if (optimal_gain <= 0)
536 if (optimal_gain <= dot) {
537 dot = dot / (dot + 0.6 * optimal_gain);
542 for (n = 0; n <
size; n++)
543 out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
572 int fcb_type,
float *
coeffs,
int remainder)
575 float irange, angle_mul, gain_mul, range,
sq;
580 #define log_range(var, assign) do { \
581 float tmp = log10f(assign); var = tmp; \
582 max = FFMAX(max, tmp); min = FFMIN(min, tmp); \
584 log_range(last_coeff, lpcs[1] * lpcs[1]);
585 for (n = 1; n < 64; n++)
586 log_range(lpcs[n], lpcs[n * 2] * lpcs[n * 2] +
587 lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
598 irange = 64.0 / range;
602 for (n = 0; n <= 64; n++) {
605 idx =
FFMAX(0,
lrint((max - lpcs[n]) * irange) - 1);
607 lpcs[n] = angle_mul * pwr;
610 idx = (pwr * gain_mul - 0.0295) * 70.570526123;
613 powf(1.0331663, idx - 127);
626 idx = 255 + av_clip(lpcs[64], -255, 255);
627 coeffs[0] = coeffs[0] * s->
cos[idx];
628 idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
629 last_coeff = coeffs[64] * s->
cos[idx];
631 idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
632 coeffs[n * 2 + 1] = coeffs[n] * s->
sin[idx];
633 coeffs[n * 2] = coeffs[n] * s->
cos[idx];
637 idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
638 coeffs[n * 2 + 1] = coeffs[n] * s->
sin[idx];
639 coeffs[n * 2] = coeffs[n] * s->
cos[idx];
647 memset(&coeffs[remainder], 0,
sizeof(coeffs[0]) * (128 - remainder));
651 coeffs[remainder - 1] = 0;
656 sq = (1.0 / 64.0) * sqrtf(1 /
ff_dot_productf(coeffs, coeffs, remainder));
657 for (n = 0; n < remainder; n++)
688 float *synth_pf,
int size,
691 int remainder, lim, n;
697 tilted_lpcs[0] = 1.0;
698 memcpy(&tilted_lpcs[1], lpcs,
sizeof(lpcs[0]) * s->
lsps);
699 memset(&tilted_lpcs[s->
lsps + 1], 0,
700 sizeof(tilted_lpcs[0]) * (128 - s->
lsps - 1));
702 tilted_lpcs, s->
lsps + 2);
708 remainder =
FFMIN(127 - size, size - 1);
713 memset(&synth_pf[size], 0,
sizeof(synth_pf[0]) * (128 - size));
718 for (n = 1; n < 64; n++) {
719 float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
720 synth_pf[n * 2] = v1 *
coeffs[n * 2] - v2 *
coeffs[n * 2 + 1];
721 synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
729 for (n = 0; n < lim; n++)
739 for (n = 0; n < lim; n++)
741 if (lim < remainder) {
771 const float *lpcs,
float *zero_exc_pf,
772 int fcb_type,
int pitch)
776 *synth_filter_in = zero_exc_pf;
785 synth_filter_in = synth_filter_in_buf;
789 synth_filter_in, size, s->
lsps);
790 memcpy(&synth_pf[-s->
lsps], &synth_pf[size - s->
lsps],
791 sizeof(synth_pf[0]) * s->
lsps);
803 (
const float[2]) { -1.99997, 1.0 },
804 (
const float[2]) { -1.9330735188, 0.93589198496 },
824 const uint16_t *values,
825 const uint16_t *
sizes,
826 int n_stages,
const uint8_t *table,
828 const double *base_q)
832 memset(lsps, 0, num *
sizeof(*lsps));
833 for (n = 0; n < n_stages; n++) {
834 const uint8_t *t_off = &table[values[n] * num];
835 double base = base_q[n], mul = mul_q[n];
837 for (m = 0; m < num; m++)
838 lsps[m] += base + mul * t_off[m];
840 table += sizes[n] * num;
857 static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
858 static const double mul_lsf[4] = {
859 5.2187144800e-3, 1.4626986422e-3,
860 9.6179549166e-4, 1.1325736225e-3
862 static const double base_lsf[4] = {
863 M_PI * -2.15522e-1,
M_PI * -6.1646e-2,
864 M_PI * -3.3486e-2,
M_PI * -5.7408e-2
882 double *i_lsps,
const double *old,
883 double *
a1,
double *
a2,
int q_mode)
885 static const uint16_t vec_sizes[3] = { 128, 64, 64 };
886 static const double mul_lsf[3] = {
887 2.5807601174e-3, 1.2354460219e-3, 1.1763821673e-3
889 static const double base_lsf[3] = {
890 M_PI * -1.07448e-1,
M_PI * -5.2706e-2,
M_PI * -5.1634e-2
892 const float (*ipol_tab)[2][10] = q_mode ?
894 uint16_t interpol,
v[3];
904 for (n = 0; n < 10; n++) {
905 double delta = old[n] - i_lsps[n];
906 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
907 a1[10 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
919 static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
920 static const double mul_lsf[5] = {
921 3.3439586280e-3, 6.9908173703e-4,
922 3.3216608306e-3, 1.0334960326e-3,
925 static const double base_lsf[5] = {
926 M_PI * -1.27576e-1,
M_PI * -2.4292e-2,
927 M_PI * -1.28094e-1,
M_PI * -3.2128e-2,
951 double *i_lsps,
const double *old,
952 double *
a1,
double *
a2,
int q_mode)
954 static const uint16_t vec_sizes[3] = { 128, 128, 128 };
955 static const double mul_lsf[3] = {
956 1.2232979501e-3, 1.4062241527e-3, 1.6114744851e-3
958 static const double base_lsf[3] = {
961 const float (*ipol_tab)[2][16] = q_mode ?
963 uint16_t interpol,
v[3];
973 for (n = 0; n < 16; n++) {
974 double delta = old[n] - i_lsps[n];
975 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
976 a1[16 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
1003 static const int16_t start_offset[94] = {
1004 -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11,
1005 13, 15, 18, 17, 19, 20, 21, 22, 23, 24, 25, 26,
1006 27, 28, 29, 30, 31, 32, 33, 35, 37, 39, 41, 43,
1007 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
1008 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91,
1009 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115,
1010 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
1011 141, 143, 145, 147, 149, 151, 153, 155, 157, 159
1017 if ((bits =
get_bits(gb, 6)) >= 54) {
1019 bits += (bits - 54) * 3 +
get_bits(gb, 2);
1025 for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
1038 if (start_offset[bits] < 0)
1055 uint16_t use_mask_mem[9];
1056 uint16_t *use_mask = use_mask_mem + 2;
1065 pulse_start, n, idx, range, aidx, start_off = 0;
1074 if (block_idx == 0) {
1083 pulse_start = s->
aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
1088 memset(&use_mask[-2], 0, 2 *
sizeof(use_mask[0]));
1089 memset( use_mask, -1, 5 *
sizeof(use_mask[0]));
1090 memset(&use_mask[5], 0, 2 *
sizeof(use_mask[0]));
1094 uint16_t *use_mask_ptr = &use_mask[idx >> 4];
1095 int first_sh = 16 - (idx & 15);
1096 *use_mask_ptr++ &= 0xFFFFu << first_sh;
1097 excl_range -= first_sh;
1098 if (excl_range >= 16) {
1099 *use_mask_ptr++ = 0;
1100 *use_mask_ptr &= 0xFFFF >> (excl_range - 16);
1102 *use_mask_ptr &= 0xFFFF >> excl_range;
1107 for (n = 0; n <= aidx; pulse_start++) {
1108 for (idx = pulse_start; idx < 0; idx += fcb->
pitch_lag) ;
1110 if (use_mask[0]) idx = 0x0F;
1111 else if (use_mask[1]) idx = 0x1F;
1112 else if (use_mask[2]) idx = 0x2F;
1113 else if (use_mask[3]) idx = 0x3F;
1114 else if (use_mask[4]) idx = 0x4F;
1116 idx -= av_log2_16bit(use_mask[idx >> 4]);
1118 if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
1119 use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
1125 fcb->
x[fcb->
n] = start_off;
1149 int n, v_mask, i_mask, sh, n_pulses;
1163 for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
1164 fcb->
y[fcb->
n] = (val & v_mask) ? -1.0 : 1.0;
1165 fcb->
x[fcb->
n] = (val & i_mask) * n_pulses + n +
1167 while (fcb->
x[fcb->
n] < 0)
1173 int num2 = (val & 0x1FF) >> 1,
delta, idx;
1175 if (num2 < 1 * 79) {
delta = 1; idx = num2 + 1; }
1176 else if (num2 < 2 * 78) {
delta = 3; idx = num2 + 1 - 1 * 77; }
1177 else if (num2 < 3 * 77) {
delta = 5; idx = num2 + 1 - 2 * 76; }
1178 else {
delta = 7; idx = num2 + 1 - 3 * 75; }
1179 v = (val & 0x200) ? -1.0 : 1.0;
1184 fcb->
x[fcb->
n + 1] = idx;
1185 fcb->
y[fcb->
n + 1] = (val & 1) ? -v : v;
1203 static int pRNG(
int frame_cntr,
int block_num,
int block_size)
1215 static const unsigned int div_tbl[9][2] = {
1216 { 8332, 3 * 715827883
U },
1217 { 4545, 0 * 390451573
U },
1218 { 3124, 11 * 268435456
U },
1219 { 2380, 15 * 204522253
U },
1220 { 1922, 23 * 165191050
U },
1221 { 1612, 23 * 138547333
U },
1222 { 1388, 27 * 119304648
U },
1223 { 1219, 16 * 104755300
U },
1224 { 1086, 39 * 93368855
U }
1226 unsigned int z, y, x =
MUL16(block_num, 1877) + frame_cntr;
1227 if (x >= 0xFFFF) x -= 0xFFFF;
1229 y = x - 9 *
MULH(477218589, x);
1230 z = (uint16_t) (x * div_tbl[y][0] +
UMULH(x, div_tbl[y][1]));
1232 return z % (1000 - block_size);
1240 int block_idx,
int size,
1262 for (n = 0; n <
size; n++)
1271 int block_idx,
int size,
1272 int block_pitch_sh2,
1276 static const float gain_coeff[6] = {
1277 0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
1279 float pulses[
MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
1280 int n, idx, gain_weight;
1284 memset(pulses, 0,
sizeof(*pulses) * size);
1301 for (n = 0; n <
size; n++)
1313 for (n = 0; n < 5; n++) {
1319 fcb.
x[fcb.
n] = n + 5 * pos1;
1320 fcb.
y[fcb.
n++] = sign;
1321 if (n < frame_desc->dbl_pulses) {
1323 fcb.
x[fcb.
n] = n + 5 * pos2;
1324 fcb.
y[fcb.
n++] = (pos1 < pos2) ? -sign : sign;
1343 for (n = 0; n < gain_weight; n++)
1349 for (n = 0; n <
size; n +=
len) {
1351 int abs_idx = block_idx * size + n;
1354 int pitch = (pitch_sh16 + 0x6FFF) >> 16;
1355 int idx_sh16 = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
1356 idx = idx_sh16 >> 16;
1359 next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
1361 next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
1372 int block_pitch = block_pitch_sh2 >> 2;
1373 idx = block_pitch_sh2 & 3;
1380 sizeof(
float) * size);
1385 acb_gain, fcb_gain, size);
1405 int block_idx,
int size,
1406 int block_pitch_sh2,
1407 const double *lsps,
const double *prev_lsps,
1409 float *excitation,
float *synth)
1420 frame_desc, excitation);
1423 fac = (block_idx + 0.5) / frame_desc->
n_blocks;
1424 for (n = 0; n < s->
lsps; n++)
1425 i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
1449 const double *lsps,
const double *prev_lsps,
1450 float *excitation,
float *synth)
1453 int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;
1461 "Invalid frame type VLC code, skipping\n");
1484 int fac = n * 2 + 1;
1486 pitch[n] = (
MUL16(fac, cur_pitch_val) +
1528 last_block_pitch = av_clip(block_pitch,
1534 if (block_pitch < t1) {
1538 if (block_pitch <
t2) {
1543 if (block_pitch <
t3) {
1550 pitch[n] = bl_pitch_sh2 >> 2;
1555 bl_pitch_sh2 = pitch[n] << 2;
1564 synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
1566 &excitation[n * block_nsamples],
1567 &synth[n * block_nsamples]);
1576 for (n = 0; n < s->
lsps; n++)
1577 i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
1583 for (n = 0; n < s->
lsps; n++)
1584 i_lsps[n] = cos(lsps[n]);
1586 postfilter(s, &synth[80], &samples[80], 80, lpcs,
1590 memcpy(samples, synth, 160 *
sizeof(synth[0]));
1630 lsps[0] =
FFMAX(lsps[0], 0.0015 *
M_PI);
1631 for (n = 1; n < num; n++)
1632 lsps[n] =
FFMAX(lsps[n], lsps[n - 1] + 0.0125 *
M_PI);
1633 lsps[num - 1] =
FFMIN(lsps[num - 1], 0.9985 *
M_PI);
1637 for (n = 1; n < num; n++) {
1638 if (lsps[n] < lsps[n - 1]) {
1639 for (m = 1; m < num; m++) {
1640 double tmp = lsps[m];
1641 for (l = m - 1; l >= 0; l--) {
1642 if (lsps[l] <= tmp)
break;
1643 lsps[l + 1] = lsps[l];
1665 int n, need_bits, bd_idx;
1687 int aw_idx_is_ext = 0;
1717 need_bits = 2 * !aw_idx_is_ext;
1753 int n, res, n_samples = 480;
1762 s->
lsps *
sizeof(*synth));
1788 if ((n_samples =
get_bits(gb, 12)) > 480) {
1790 "Superframe encodes >480 samples (%d), not allowed\n",
1799 for (n = 0; n < s->
lsps; n++)
1800 prev_lsps[n] = s->
prev_lsps[n] - mean_lsf[n];
1802 if (s->
lsps == 10) {
1807 for (n = 0; n < s->
lsps; n++) {
1808 lsps[0][n] = mean_lsf[n] + (a1[n] - a2[n * 2]);
1809 lsps[1][n] = mean_lsf[n] + (a1[s->
lsps + n] - a2[n * 2 + 1]);
1810 lsps[2][n] += mean_lsf[n];
1812 for (n = 0; n < 3; n++)
1826 for (n = 0; n < 3; n++) {
1830 if (s->
lsps == 10) {
1835 for (m = 0; m < s->
lsps; m++)
1836 lsps[n][m] += mean_lsf[m];
1842 lsps[n], n == 0 ? s->
prev_lsps : lsps[n - 1],
1844 &synth[s->
lsps + n * MAX_FRAMESIZE]))) {
1864 s->
lsps *
sizeof(*synth));
1895 }
while (res == 0x3F);
1920 int rmn_bytes, rmn_bits;
1923 if (rmn_bits < nbits)
1927 rmn_bits &= 7; rmn_bytes >>= 3;
1928 if ((rmn_bits =
FFMIN(rmn_bits, nbits)) > 0)
1931 FFMIN(nbits - rmn_bits, rmn_bytes << 3));
1946 int *got_frame_ptr,
AVPacket *avpkt)
2001 }
else if (*got_frame_ptr) {
2044 for (n = 0; n < s->
lsps; n++)
Description of frame types.
static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)
Apply first set of pitch-adaptive window pulses.
av_cold void ff_rdft_end(RDFTContext *s)
static const uint8_t wmavoice_dq_lsp16r2[0x500]
int do_apf
whether to apply the averaged projection filter (APF)
static const int16_t coeffs[28]
static int pRNG(int frame_cntr, int block_num, int block_size)
Generate a random number from frame_cntr and block_idx, which will lief in the range [0...
static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
Set up the variable bit mode (VBM) tree from container extradata.
void ff_celp_lp_synthesis_filterf(float *out, const float *filter_coeffs, const float *in, int buffer_length, int filter_length)
LP synthesis filter.
float gain_pred_err[6]
cache for gain prediction
float ff_dot_productf(const float *a, const float *b, int length)
Return the dot product.
void(* dct_calc)(struct DCTContext *s, FFTSample *data)
int aw_next_pulse_off_cache
the position (relative to start of the second block) at which pulses should start to be positioned...
int frame_lsp_bitsize
size (in bits) of LSPs, when encoded per-frame (independent coding)
uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE+FF_INPUT_BUFFER_PADDING_SIZE]
cache for superframe data split over multiple packets
float postfilter_agc
gain control memory, used in adaptive_gain_control()
void ff_acelp_apply_order_2_transfer_function(float *out, const float *in, const float zero_coeffs[2], const float pole_coeffs[2], float gain, float mem[2], int n)
Apply an order 2 rational transfer function in-place.
static unsigned int get_bits(GetBitContext *s, int n)
Read 1-25 bits.
static void postfilter(WMAVoiceContext *s, const float *synth, float *samples, int size, const float *lpcs, float *zero_exc_pf, int fcb_type, int pitch)
Averaging projection filter, the postfilter used in WMAVoice.
void ff_weighted_vector_sumf(float *out, const float *in_a, const float *in_b, float weight_coeff_a, float weight_coeff_b, int length)
float implementation of weighted sum of two vectors.
static void skip_bits_long(GetBitContext *s, int n)
AVFrame * coded_frame
the picture in the bitstream
float synth_filter_out_buf[0x80+MAX_LSPS_ALIGN16]
aligned buffer for postfilter speech synthesis
static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb, const int *pitch)
Parse the offset of the first pitch-adaptive window pulses, and the distribution of pulses between th...
int aw_n_pulses[2]
number of AW-pulses in each block; note that this number can be negative (in which case it basically ...
void avpriv_copy_bits(PutBitContext *pb, const uint8_t *src, int length)
Copy the content of src to the bitstream.
static void stabilize_lsps(double *lsps, int num)
Ensure minimum value for first item, maximum value for last value, proper spacing between each value ...
#define DECLARE_ALIGNED(n, t, v)
static const float wmavoice_gain_codebook_fcb[128]
static const uint8_t wmavoice_dq_lsp16i1[0x640]
static const uint8_t wmavoice_dq_lsp16r1[0x500]
int spillover_nbits
number of bits of the previous packet's last superframe preceding this packet's first full superframe...
void ff_set_fixed_vector(float *out, const AMRFixed *in, float scale, int size)
Add fixed vector to an array from a sparse representation.
int block_pitch_nbits
number of bits used to specify the first block's pitch value
static const uint8_t wmavoice_dq_lsp16i3[0x300]
static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx, float *samples, const double *lsps, const double *prev_lsps, float *excitation, float *synth)
Synthesize output samples for a single frame.
static void calc_input_response(WMAVoiceContext *s, float *lpcs, int fcb_type, float *coeffs, int remainder)
Derive denoise filter coefficients (in real domain) from the LPCs.
static void dequant_lsp10i(GetBitContext *gb, double *lsps)
Parse 10 independently-coded LSPs.
#define MAX_LSPS_ALIGN16
same as MAX_LSPS; needs to be multiple
int block_align
number of bytes per packet if constant and known or 0 Used by some WAV based audio codecs...
static int aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)
Apply second set of pitch-adaptive window pulses.
static const float wmavoice_ipol1_coeffs[17 *9]
no adaptive codebook (only hardcoded fixed)
static const uint8_t wmavoice_dq_lsp16i2[0x3c0]
hardcoded (fixed) codebook with per-block gain values
int spillover_bitsize
number of bits used to specify spillover_nbits in the packet header = ceil(log2(ctx->block_align << 3...
static int decode(MimicContext *ctx, int quality, int num_coeffs, int is_iframe)
int block_delta_pitch_nbits
number of bits used to specify the delta pitch between this and the last block's pitch value...
enum AVSampleFormat sample_fmt
audio sample format
Sparse representation for the algebraic codebook (fixed) vector.
static const uint8_t wmavoice_dq_lsp16r3[0x600]
#define INIT_VLC_STATIC(vlc, bits, a, b, c, d, e, f, g, static_size)
static const float wmavoice_gain_codebook_acb[128]
uint8_t log_n_blocks
log2(n_blocks)
int aw_first_pulse_off[2]
index of first sample to which to apply AW-pulses, or -0xff if unset
void av_memcpy_backptr(uint8_t *dst, int back, int cnt)
deliberately overlapping memcpy implementation
int has_residual_lsps
if set, superframes contain one set of LSPs that cover all frames, encoded as independent and residua...
float tilted_lpcs_pf[0x80]
aligned buffer for LPC tilting
uint8_t * extradata
some codecs need / can use extradata like Huffman tables.
static float tilt_factor(const float *lpcs, int n_lpcs)
Get the tilt factor of a formant filter from its transfer function.
static const uint8_t wmavoice_dq_lsp10r[0x1400]
static void dequant_lsps(double *lsps, int num, const uint16_t *values, const uint16_t *sizes, int n_stages, const uint8_t *table, const double *mul_q, const double *base_q)
Dequantize LSPs.
static int synth_superframe(AVCodecContext *ctx, int *got_frame_ptr)
Synthesize output samples for a single superframe.
static int check_bits_for_superframe(GetBitContext *orig_gb, WMAVoiceContext *s)
Test if there's enough bits to read 1 superframe.
static const float wmavoice_ipol2_coeffs[32]
Hamming-window sinc function (num = 32, x = [ 0, 31 ]): (0.54 + 0.46 * cos(2 * M_PI * x / (num - 1)))...
static int get_bits_count(const GetBitContext *s)
float dcf_mem[2]
DC filter history.
bitstream reader API header.
static av_cold void wmavoice_flush(AVCodecContext *ctx)
float synth_history[MAX_LSPS]
see excitation_history
double prev_lsps[MAX_LSPS]
LSPs of the last frame of the previous superframe.
static void copy_bits(PutBitContext *pb, const uint8_t *data, int size, GetBitContext *gb, int nbits)
Copy (unaligned) bits from gb/data/size to pb.
static int init(AVCodecParserContext *s)
#define CODEC_CAP_DR1
Codec uses get_buffer() for allocating buffers and supports custom allocators.
static int get_bits_left(GetBitContext *gb)
static const double wmavoice_mean_lsf16[2][16]
Per-block pitch with signal generation using a Hamming sinc window function.
int sframe_cache_size
set to >0 if we have data from an (incomplete) superframe from a previous packet that spilled over in...
static const float wmavoice_lsp10_intercoeff_b[32][2][10]
int block_pitch_range
range of the block pitch
static const float wmavoice_std_codebook[1000]
int last_acb_type
frame type [0-2] of the previous frame
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
static const float wmavoice_gain_silence[256]
int denoise_filter_cache_size
samples in denoise_filter_cache
int history_nsamples
number of samples in history for signal prediction (through ACB)
static const uint8_t wmavoice_dq_lsp10i[0xf00]
static const float wmavoice_lsp10_intercoeff_a[32][2][10]
static const float wmavoice_energy_table[128]
LUT for 1.071575641632 * pow(1.0331663, n - 127)
void av_log(void *avcl, int level, const char *fmt,...)
Windows Media Voice (WMAVoice) tables.
const char * name
Name of the codec implementation.
int denoise_tilt_corr
Whether to apply tilt correction to the Wiener filter coefficients (postfilter)
static void put_bits(PutBitContext *s, int n, unsigned int value)
Write up to 31 bits into a bitstream.
int aw_idx_is_ext
whether the AW index was encoded in 8 bits (instead of 6)
uint16_t block_conv_table[4]
boundaries for block pitch unit/scale conversion
DCTContext dst
contexts for phase shift (in Hilbert transform, part of postfilter)
int lsp_def_mode
defines different sets of LSP defaults [0, 1]
adaptive codebook with per-frame pitch, which we interpolate to get a per-sample pitch.
void(* rdft_calc)(struct RDFTContext *s, FFTSample *z)
static int put_bits_count(PutBitContext *s)
int skip_bits_next
number of bits to skip at the next call to wmavoice_decode_packet() (since they're part of the previo...
static void dequant_lsp16r(GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)
Parse 16 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames f...
int min_pitch_val
base value for pitch parsing code
WMA Voice decoding context.
static void wiener_denoise(WMAVoiceContext *s, int fcb_type, float *synth_pf, int size, const float *lpcs)
This function applies a Wiener filter on the (noisy) speech signal as a means to denoise it...
int denoise_strength
strength of denoising in Wiener filter [0-11]
#define log_range(var, assign)
#define MAX_LSPS
maximum filter order
static VLC frame_type_vlc
Frame type VLC coding.
int pitch_nbits
number of bits used to specify the pitch value in the frame header
#define MAX_BLOCKS
maximum number of blocks per frame
float denoise_coeffs_pf[0x80]
aligned buffer for denoise coefficients
static void dequant_lsp10r(GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)
Parse 10 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames f...
int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame)
static av_always_inline unsigned UMULH(unsigned a, unsigned b)
static av_always_inline int get_vlc2(GetBitContext *s, VLC_TYPE(*table)[2], int bits, int max_depth)
Parse a vlc code.
static int kalman_smoothen(WMAVoiceContext *s, int pitch, const float *in, float *out, int size)
Kalman smoothing function.
void ff_tilt_compensation(float *mem, float tilt, float *samples, int size)
Apply tilt compensation filter, 1 - tilt * z-1.
#define sq(reg, off, base)
void ff_sine_window_init(float *window, int n)
Generate a sine window.
static const float wmavoice_gain_universal[64]
void ff_acelp_lspd2lpc(const double *lsp, float *lpc, int lp_half_order)
Reconstruct LPC coefficients from the line spectral pair frequencies.
static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
Set up decoder with parameters from demuxer (extradata etc.).
int sframe_lsp_bitsize
size (in bits) of LSPs, when encoded per superframe (residual coding)
static const uint8_t last_coeff[3]
static const struct frame_type_desc frame_descs[17]
float denoise_filter_cache[MAX_FRAMESIZE]
int sample_rate
samples per second
static int wmavoice_decode_packet(AVCodecContext *ctx, void *data, int *got_frame_ptr, AVPacket *avpkt)
Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer...
main external API structure.
static int parse_packet_header(WMAVoiceContext *s)
Parse the packet header at the start of each packet (input data to this decoder). ...
static void close(AVCodecParserContext *s)
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
AVCodec ff_wmavoice_decoder
int8_t vbm_tree[25]
converts VLC codes to frame type
static unsigned int get_bits1(GetBitContext *s)
static void synth_block(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const double *lsps, const double *prev_lsps, const struct frame_type_desc *frame_desc, float *excitation, float *synth)
Parse data in a single block.
static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
static void skip_bits(GetBitContext *s, int n)
av_cold int ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType inverse)
Set up DCT.
int pitch_diff_sh16
((cur_pitch_val - last_pitch_val) << 16) / MAX_FRAMESIZE
static int init_get_bits(GetBitContext *s, const uint8_t *buffer, int bit_size)
Initialize GetBitContext.
#define MAX_SFRAMESIZE
maximum number of samples per superframe
int lsp_q_mode
defines quantizer defaults [0, 1]
int frame_cntr
current frame index [0 - 0xFFFE]; is only used for comfort noise in pRNG()
void ff_celp_lp_zero_synthesis_filterf(float *out, const float *filter_coeffs, const float *in, int buffer_length, int filter_length)
LP zero synthesis filter.
static av_always_inline av_const long int lrint(double x)
Pitch-adaptive window (AW) pulse signals, used in particular for low-bitrate streams.
#define FF_INPUT_BUFFER_PADDING_SIZE
Required number of additionally allocated bytes at the end of the input bitstream for decoding...
static void adaptive_gain_control(float *out, const float *in, const float *speech_synth, int size, float alpha, float *gain_mem)
Adaptive gain control (as used in postfilter).
static const float mean_lsf[10]
#define SFRAME_CACHE_MAXSIZE
maximum cache size for frame data that
comfort noise during silence generated from a hardcoded (fixed) codebook with per-frame (low) gain va...
uint8_t fcb_type
Fixed codebook type (FCB_TYPE_*)
static void dequant_lsp16i(GetBitContext *gb, double *lsps)
Parse 16 independently-coded LSPs.
RDFTContext irdft
contexts for FFT-calculation in the postfilter (for denoise filter)
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, const struct frame_type_desc *frame_desc, float *excitation)
Parse hardcoded signal for a single block.
uint8_t n_blocks
amount of blocks per frame (each block (contains 160/n_blocks samples)
common internal api header.
static void flush_put_bits(PutBitContext *s)
Pad the end of the output stream with zeros.
static av_cold void flush(AVCodecContext *avctx)
Flush (reset) the frame ID after seeking.
float excitation_history[MAX_SIGNAL_HISTORY]
cache of the signal of previous superframes, used as a history for signal generation ...
static void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
Initialize the PutBitContext s.
Innovation (fixed) codebook pulse sets in combinations of either single pulses or pulse pairs...
int last_pitch_val
pitch value of the previous frame
#define MAX_FRAMESIZE
maximum number of samples per frame
float silence_gain
set for use in blocks if ACB_TYPE_NONE
static const double wmavoice_mean_lsf10[2][10]
#define CODEC_CAP_SUBFRAMES
Codec can output multiple frames per AVPacket Normally demuxers return one frame at a time...
VLC_TYPE(* table)[2]
code, bits
av_cold void ff_dct_end(DCTContext *s)
void ff_acelp_interpolatef(float *out, const float *in, const float *filter_coeffs, int precision, int frac_pos, int filter_length, int length)
Floating point version of ff_acelp_interpolate()
int block_delta_pitch_hrange
1/2 range of the delta (full range is from -this to +this-1)
int max_pitch_val
max value + 1 for pitch parsing
int lsps
number of LSPs per frame [10 or 16]
#define MAX_FRAMES
maximum number of frames per superframe
static const float wmavoice_lsp16_intercoeff_b[32][2][16]
void avcodec_get_frame_defaults(AVFrame *pic)
Set the fields of the given AVFrame to default values.
PutBitContext pb
bitstream writer for sframe_cache
uint8_t acb_type
Adaptive codebook type (ACB_TYPE_*)
static const float wmavoice_denoise_power_table[12][64]
LUT for f(x,y) = pow((y + 6.9) / 64, 0.025 * (x + 1)).
int dc_level
Predicted amount of DC noise, based on which a DC removal filter is used.
#define VLC_NBITS
number of bits to read per VLC iteration
static const int sizes[][2]
static const float wmavoice_lsp16_intercoeff_a[32][2][16]
void av_log_missing_feature(void *avc, const char *feature, int want_sample)
Log a generic warning message about a missing feature.
float cos[511]
8-bit cosine/sine windows over [-pi,pi] range
av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans)
Set up a real FFT.
int aw_pulse_range
the range over which aw_pulse_set1() can apply the pulse, relative to the value in aw_first_pulse_off...
int nb_samples
number of audio samples (per channel) described by this frame
float zero_exc_pf[MAX_SIGNAL_HISTORY+MAX_SFRAMESIZE]
zero filter output (i.e.
static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const struct frame_type_desc *frame_desc, float *excitation)
Parse FCB/ACB signal for a single block.
uint8_t dbl_pulses
how many pulse vectors have pulse pairs (rather than just one single pulse) only if fcb_type == FCB_T...
#define MAX_SIGNAL_HISTORY
maximum excitation signal history
uint16_t frame_size
the amount of bits that make up the block data (per frame)
GetBitContext gb
packet bitreader.