linux/drivers/staging/echo/echo.c
<<
>>
Prefs
   1/*
   2 * SpanDSP - a series of DSP components for telephony
   3 *
   4 * echo.c - A line echo canceller.  This code is being developed
   5 *          against and partially complies with G168.
   6 *
   7 * Written by Steve Underwood <steveu@coppice.org>
   8 *         and David Rowe <david_at_rowetel_dot_com>
   9 *
  10 * Copyright (C) 2001, 2003 Steve Underwood, 2007 David Rowe
  11 *
  12 * Based on a bit from here, a bit from there, eye of toad, ear of
  13 * bat, 15 years of failed attempts by David and a few fried brain
  14 * cells.
  15 *
  16 * All rights reserved.
  17 *
  18 * This program is free software; you can redistribute it and/or modify
  19 * it under the terms of the GNU General Public License version 2, as
  20 * published by the Free Software Foundation.
  21 *
  22 * This program is distributed in the hope that it will be useful,
  23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  25 * GNU General Public License for more details.
  26 *
  27 * You should have received a copy of the GNU General Public License
  28 * along with this program; if not, write to the Free Software
  29 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  30 */
  31
  32/*! \file */
  33
  34/* Implementation Notes
  35   David Rowe
  36   April 2007
  37
  38   This code started life as Steve's NLMS algorithm with a tap
  39   rotation algorithm to handle divergence during double talk.  I
  40   added a Geigel Double Talk Detector (DTD) [2] and performed some
  41   G168 tests.  However I had trouble meeting the G168 requirements,
  42   especially for double talk - there were always cases where my DTD
  43   failed, for example where near end speech was under the 6dB
  44   threshold required for declaring double talk.
  45
  46   So I tried a two path algorithm [1], which has so far given better
  47   results.  The original tap rotation/Geigel algorithm is available
  48   in SVN http://svn.rowetel.com/software/oslec/tags/before_16bit.
  49   It's probably possible to make it work if some one wants to put some
  50   serious work into it.
  51
  52   At present no special treatment is provided for tones, which
  53   generally cause NLMS algorithms to diverge.  Initial runs of a
  54   subset of the G168 tests for tones (e.g ./echo_test 6) show the
  55   current algorithm is passing OK, which is kind of surprising.  The
  56   full set of tests needs to be performed to confirm this result.
  57
  58   One other interesting change is that I have managed to get the NLMS
  59   code to work with 16 bit coefficients, rather than the original 32
  60   bit coefficents.  This reduces the MIPs and storage required.
  61   I evaulated the 16 bit port using g168_tests.sh and listening tests
  62   on 4 real-world samples.
  63
  64   I also attempted the implementation of a block based NLMS update
  65   [2] but although this passes g168_tests.sh it didn't converge well
  66   on the real-world samples.  I have no idea why, perhaps a scaling
  67   problem.  The block based code is also available in SVN
  68   http://svn.rowetel.com/software/oslec/tags/before_16bit.  If this
  69   code can be debugged, it will lead to further reduction in MIPS, as
  70   the block update code maps nicely onto DSP instruction sets (it's a
  71   dot product) compared to the current sample-by-sample update.
  72
  73   Steve also has some nice notes on echo cancellers in echo.h
  74
  75   References:
  76
  77   [1] Ochiai, Areseki, and Ogihara, "Echo Canceller with Two Echo
  78       Path Models", IEEE Transactions on communications, COM-25,
  79       No. 6, June
  80       1977.
  81       http://www.rowetel.com/images/echo/dual_path_paper.pdf
  82
  83   [2] The classic, very useful paper that tells you how to
  84       actually build a real world echo canceller:
  85         Messerschmitt, Hedberg, Cole, Haoui, Winship, "Digital Voice
  86         Echo Canceller with a TMS320020,
  87         http://www.rowetel.com/images/echo/spra129.pdf
  88
  89   [3] I have written a series of blog posts on this work, here is
  90       Part 1: http://www.rowetel.com/blog/?p=18
  91
  92   [4] The source code http://svn.rowetel.com/software/oslec/
  93
  94   [5] A nice reference on LMS filters:
  95         http://en.wikipedia.org/wiki/Least_mean_squares_filter
  96
  97   Credits:
  98
  99   Thanks to Steve Underwood, Jean-Marc Valin, and Ramakrishnan
 100   Muthukrishnan for their suggestions and email discussions.  Thanks
 101   also to those people who collected echo samples for me such as
 102   Mark, Pawel, and Pavel.
 103*/
 104
 105#include <linux/kernel.h>
 106#include <linux/module.h>
 107#include <linux/slab.h>
 108
 109#include "echo.h"
 110
 111#define MIN_TX_POWER_FOR_ADAPTION       64
 112#define MIN_RX_POWER_FOR_ADAPTION       64
 113#define DTD_HANGOVER                    600     /* 600 samples, or 75ms     */
 114#define DC_LOG2BETA                     3       /* log2() of DC filter Beta */
 115
 116
 117/* adapting coeffs using the traditional stochastic descent (N)LMS algorithm */
 118
 119#ifdef __bfin__
 120static inline void lms_adapt_bg(struct oslec_state *ec, int clean,
 121                                    int shift)
 122{
 123        int i, j;
 124        int offset1;
 125        int offset2;
 126        int factor;
 127        int exp;
 128        int16_t *phist;
 129        int n;
 130
 131        if (shift > 0)
 132                factor = clean << shift;
 133        else
 134                factor = clean >> -shift;
 135
 136        /* Update the FIR taps */
 137
 138        offset2 = ec->curr_pos;
 139        offset1 = ec->taps - offset2;
 140        phist = &ec->fir_state_bg.history[offset2];
 141
 142        /* st: and en: help us locate the assembler in echo.s */
 143
 144        /* asm("st:"); */
 145        n = ec->taps;
 146        for (i = 0, j = offset2; i < n; i++, j++) {
 147                exp = *phist++ * factor;
 148                ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
 149        }
 150        /* asm("en:"); */
 151
 152        /* Note the asm for the inner loop above generated by Blackfin gcc
 153           4.1.1 is pretty good (note even parallel instructions used):
 154
 155           R0 = W [P0++] (X);
 156           R0 *= R2;
 157           R0 = R0 + R3 (NS) ||
 158           R1 = W [P1] (X) ||
 159           nop;
 160           R0 >>>= 15;
 161           R0 = R0 + R1;
 162           W [P1++] = R0;
 163
 164           A block based update algorithm would be much faster but the
 165           above can't be improved on much.  Every instruction saved in
 166           the loop above is 2 MIPs/ch!  The for loop above is where the
 167           Blackfin spends most of it's time - about 17 MIPs/ch measured
 168           with speedtest.c with 256 taps (32ms).  Write-back and
 169           Write-through cache gave about the same performance.
 170         */
 171}
 172
 173/*
 174   IDEAS for further optimisation of lms_adapt_bg():
 175
 176   1/ The rounding is quite costly.  Could we keep as 32 bit coeffs
 177   then make filter pluck the MS 16-bits of the coeffs when filtering?
 178   However this would lower potential optimisation of filter, as I
 179   think the dual-MAC architecture requires packed 16 bit coeffs.
 180
 181   2/ Block based update would be more efficient, as per comments above,
 182   could use dual MAC architecture.
 183
 184   3/ Look for same sample Blackfin LMS code, see if we can get dual-MAC
 185   packing.
 186
 187   4/ Execute the whole e/c in a block of say 20ms rather than sample
 188   by sample.  Processing a few samples every ms is inefficient.
 189*/
 190
 191#else
 192static inline void lms_adapt_bg(struct oslec_state *ec, int clean,
 193                                    int shift)
 194{
 195        int i;
 196
 197        int offset1;
 198        int offset2;
 199        int factor;
 200        int exp;
 201
 202        if (shift > 0)
 203                factor = clean << shift;
 204        else
 205                factor = clean >> -shift;
 206
 207        /* Update the FIR taps */
 208
 209        offset2 = ec->curr_pos;
 210        offset1 = ec->taps - offset2;
 211
 212        for (i = ec->taps - 1; i >= offset1; i--) {
 213                exp = (ec->fir_state_bg.history[i - offset1] * factor);
 214                ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
 215        }
 216        for (; i >= 0; i--) {
 217                exp = (ec->fir_state_bg.history[i + offset2] * factor);
 218                ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
 219        }
 220}
 221#endif
 222
 223static inline int top_bit(unsigned int bits)
 224{
 225        if (bits == 0)
 226                return -1;
 227        else
 228                return (int)fls((int32_t)bits)-1;
 229}
 230
 231struct oslec_state *oslec_create(int len, int adaption_mode)
 232{
 233        struct oslec_state *ec;
 234        int i;
 235
 236        ec = kzalloc(sizeof(*ec), GFP_KERNEL);
 237        if (!ec)
 238                return NULL;
 239
 240        ec->taps = len;
 241        ec->log2taps = top_bit(len);
 242        ec->curr_pos = ec->taps - 1;
 243
 244        for (i = 0; i < 2; i++) {
 245                ec->fir_taps16[i] =
 246                    kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
 247                if (!ec->fir_taps16[i])
 248                        goto error_oom;
 249        }
 250
 251        fir16_create(&ec->fir_state, ec->fir_taps16[0], ec->taps);
 252        fir16_create(&ec->fir_state_bg, ec->fir_taps16[1], ec->taps);
 253
 254        for (i = 0; i < 5; i++)
 255                ec->xvtx[i] = ec->yvtx[i] = ec->xvrx[i] = ec->yvrx[i] = 0;
 256
 257        ec->cng_level = 1000;
 258        oslec_adaption_mode(ec, adaption_mode);
 259
 260        ec->snapshot = kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
 261        if (!ec->snapshot)
 262                goto error_oom;
 263
 264        ec->cond_met = 0;
 265        ec->Pstates = 0;
 266        ec->Ltxacc = ec->Lrxacc = ec->Lcleanacc = ec->Lclean_bgacc = 0;
 267        ec->Ltx = ec->Lrx = ec->Lclean = ec->Lclean_bg = 0;
 268        ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
 269        ec->Lbgn = ec->Lbgn_acc = 0;
 270        ec->Lbgn_upper = 200;
 271        ec->Lbgn_upper_acc = ec->Lbgn_upper << 13;
 272
 273        return ec;
 274
 275error_oom:
 276        for (i = 0; i < 2; i++)
 277                kfree(ec->fir_taps16[i]);
 278
 279        kfree(ec);
 280        return NULL;
 281}
 282EXPORT_SYMBOL_GPL(oslec_create);
 283
 284void oslec_free(struct oslec_state *ec)
 285{
 286        int i;
 287
 288        fir16_free(&ec->fir_state);
 289        fir16_free(&ec->fir_state_bg);
 290        for (i = 0; i < 2; i++)
 291                kfree(ec->fir_taps16[i]);
 292        kfree(ec->snapshot);
 293        kfree(ec);
 294}
 295EXPORT_SYMBOL_GPL(oslec_free);
 296
 297void oslec_adaption_mode(struct oslec_state *ec, int adaption_mode)
 298{
 299        ec->adaption_mode = adaption_mode;
 300}
 301EXPORT_SYMBOL_GPL(oslec_adaption_mode);
 302
 303void oslec_flush(struct oslec_state *ec)
 304{
 305        int i;
 306
 307        ec->Ltxacc = ec->Lrxacc = ec->Lcleanacc = ec->Lclean_bgacc = 0;
 308        ec->Ltx = ec->Lrx = ec->Lclean = ec->Lclean_bg = 0;
 309        ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
 310
 311        ec->Lbgn = ec->Lbgn_acc = 0;
 312        ec->Lbgn_upper = 200;
 313        ec->Lbgn_upper_acc = ec->Lbgn_upper << 13;
 314
 315        ec->nonupdate_dwell = 0;
 316
 317        fir16_flush(&ec->fir_state);
 318        fir16_flush(&ec->fir_state_bg);
 319        ec->fir_state.curr_pos = ec->taps - 1;
 320        ec->fir_state_bg.curr_pos = ec->taps - 1;
 321        for (i = 0; i < 2; i++)
 322                memset(ec->fir_taps16[i], 0, ec->taps * sizeof(int16_t));
 323
 324        ec->curr_pos = ec->taps - 1;
 325        ec->Pstates = 0;
 326}
 327EXPORT_SYMBOL_GPL(oslec_flush);
 328
 329void oslec_snapshot(struct oslec_state *ec)
 330{
 331        memcpy(ec->snapshot, ec->fir_taps16[0], ec->taps * sizeof(int16_t));
 332}
 333EXPORT_SYMBOL_GPL(oslec_snapshot);
 334
 335/* Dual Path Echo Canceller */
 336
 337int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
 338{
 339        int32_t echo_value;
 340        int clean_bg;
 341        int tmp, tmp1;
 342
 343        /*
 344         * Input scaling was found be required to prevent problems when tx
 345         * starts clipping.  Another possible way to handle this would be the
 346         * filter coefficent scaling.
 347         */
 348
 349        ec->tx = tx;
 350        ec->rx = rx;
 351        tx >>= 1;
 352        rx >>= 1;
 353
 354        /*
 355         * Filter DC, 3dB point is 160Hz (I think), note 32 bit precision
 356         * required otherwise values do not track down to 0. Zero at DC, Pole
 357         * at (1-Beta) on real axis.  Some chip sets (like Si labs) don't
 358         * need this, but something like a $10 X100P card does.  Any DC really
 359         * slows down convergence.
 360         *
 361         * Note: removes some low frequency from the signal, this reduces the
 362         * speech quality when listening to samples through headphones but may
 363         * not be obvious through a telephone handset.
 364         *
 365         * Note that the 3dB frequency in radians is approx Beta, e.g. for Beta
 366         * = 2^(-3) = 0.125, 3dB freq is 0.125 rads = 159Hz.
 367         */
 368
 369        if (ec->adaption_mode & ECHO_CAN_USE_RX_HPF) {
 370                tmp = rx << 15;
 371
 372                /*
 373                 * Make sure the gain of the HPF is 1.0. This can still
 374                 * saturate a little under impulse conditions, and it might
 375                 * roll to 32768 and need clipping on sustained peak level
 376                 * signals. However, the scale of such clipping is small, and
 377                 * the error due to any saturation should not markedly affect
 378                 * the downstream processing.
 379                 */
 380                tmp -= (tmp >> 4);
 381
 382                ec->rx_1 += -(ec->rx_1 >> DC_LOG2BETA) + tmp - ec->rx_2;
 383
 384                /*
 385                 * hard limit filter to prevent clipping.  Note that at this
 386                 * stage rx should be limited to +/- 16383 due to right shift
 387                 * above
 388                 */
 389                tmp1 = ec->rx_1 >> 15;
 390                if (tmp1 > 16383)
 391                        tmp1 = 16383;
 392                if (tmp1 < -16383)
 393                        tmp1 = -16383;
 394                rx = tmp1;
 395                ec->rx_2 = tmp;
 396        }
 397
 398        /* Block average of power in the filter states.  Used for
 399           adaption power calculation. */
 400
 401        {
 402                int new, old;
 403
 404                /* efficient "out with the old and in with the new" algorithm so
 405                   we don't have to recalculate over the whole block of
 406                   samples. */
 407                new = (int)tx * (int)tx;
 408                old = (int)ec->fir_state.history[ec->fir_state.curr_pos] *
 409                    (int)ec->fir_state.history[ec->fir_state.curr_pos];
 410                ec->Pstates +=
 411                    ((new - old) + (1 << (ec->log2taps-1))) >> ec->log2taps;
 412                if (ec->Pstates < 0)
 413                        ec->Pstates = 0;
 414        }
 415
 416        /* Calculate short term average levels using simple single pole IIRs */
 417
 418        ec->Ltxacc += abs(tx) - ec->Ltx;
 419        ec->Ltx = (ec->Ltxacc + (1 << 4)) >> 5;
 420        ec->Lrxacc += abs(rx) - ec->Lrx;
 421        ec->Lrx = (ec->Lrxacc + (1 << 4)) >> 5;
 422
 423        /* Foreground filter */
 424
 425        ec->fir_state.coeffs = ec->fir_taps16[0];
 426        echo_value = fir16(&ec->fir_state, tx);
 427        ec->clean = rx - echo_value;
 428        ec->Lcleanacc += abs(ec->clean) - ec->Lclean;
 429        ec->Lclean = (ec->Lcleanacc + (1 << 4)) >> 5;
 430
 431        /* Background filter */
 432
 433        echo_value = fir16(&ec->fir_state_bg, tx);
 434        clean_bg = rx - echo_value;
 435        ec->Lclean_bgacc += abs(clean_bg) - ec->Lclean_bg;
 436        ec->Lclean_bg = (ec->Lclean_bgacc + (1 << 4)) >> 5;
 437
 438        /* Background Filter adaption */
 439
 440        /* Almost always adap bg filter, just simple DT and energy
 441           detection to minimise adaption in cases of strong double talk.
 442           However this is not critical for the dual path algorithm.
 443         */
 444        ec->factor = 0;
 445        ec->shift = 0;
 446        if ((ec->nonupdate_dwell == 0)) {
 447                int P, logP, shift;
 448
 449                /* Determine:
 450
 451                   f = Beta * clean_bg_rx/P ------ (1)
 452
 453                   where P is the total power in the filter states.
 454
 455                   The Boffins have shown that if we obey (1) we converge
 456                   quickly and avoid instability.
 457
 458                   The correct factor f must be in Q30, as this is the fixed
 459                   point format required by the lms_adapt_bg() function,
 460                   therefore the scaled version of (1) is:
 461
 462                   (2^30) * f  = (2^30) * Beta * clean_bg_rx/P
 463                   factor      = (2^30) * Beta * clean_bg_rx/P     ----- (2)
 464
 465                   We have chosen Beta = 0.25 by experiment, so:
 466
 467                   factor      = (2^30) * (2^-2) * clean_bg_rx/P
 468
 469                                                (30 - 2 - log2(P))
 470                   factor      = clean_bg_rx 2                     ----- (3)
 471
 472                   To avoid a divide we approximate log2(P) as top_bit(P),
 473                   which returns the position of the highest non-zero bit in
 474                   P.  This approximation introduces an error as large as a
 475                   factor of 2, but the algorithm seems to handle it OK.
 476
 477                   Come to think of it a divide may not be a big deal on a
 478                   modern DSP, so its probably worth checking out the cycles
 479                   for a divide versus a top_bit() implementation.
 480                 */
 481
 482                P = MIN_TX_POWER_FOR_ADAPTION + ec->Pstates;
 483                logP = top_bit(P) + ec->log2taps;
 484                shift = 30 - 2 - logP;
 485                ec->shift = shift;
 486
 487                lms_adapt_bg(ec, clean_bg, shift);
 488        }
 489
 490        /* very simple DTD to make sure we dont try and adapt with strong
 491           near end speech */
 492
 493        ec->adapt = 0;
 494        if ((ec->Lrx > MIN_RX_POWER_FOR_ADAPTION) && (ec->Lrx > ec->Ltx))
 495                ec->nonupdate_dwell = DTD_HANGOVER;
 496        if (ec->nonupdate_dwell)
 497                ec->nonupdate_dwell--;
 498
 499        /* Transfer logic */
 500
 501        /* These conditions are from the dual path paper [1], I messed with
 502           them a bit to improve performance. */
 503
 504        if ((ec->adaption_mode & ECHO_CAN_USE_ADAPTION) &&
 505            (ec->nonupdate_dwell == 0) &&
 506            /* (ec->Lclean_bg < 0.875*ec->Lclean) */
 507            (8 * ec->Lclean_bg < 7 * ec->Lclean) &&
 508            /* (ec->Lclean_bg < 0.125*ec->Ltx) */
 509            (8 * ec->Lclean_bg < ec->Ltx)) {
 510                if (ec->cond_met == 6) {
 511                        /*
 512                         * BG filter has had better results for 6 consecutive
 513                         * samples
 514                         */
 515                        ec->adapt = 1;
 516                        memcpy(ec->fir_taps16[0], ec->fir_taps16[1],
 517                                ec->taps * sizeof(int16_t));
 518                } else
 519                        ec->cond_met++;
 520        } else
 521                ec->cond_met = 0;
 522
 523        /* Non-Linear Processing */
 524
 525        ec->clean_nlp = ec->clean;
 526        if (ec->adaption_mode & ECHO_CAN_USE_NLP) {
 527                /*
 528                 * Non-linear processor - a fancy way to say "zap small
 529                 * signals, to avoid residual echo due to (uLaw/ALaw)
 530                 * non-linearity in the channel.".
 531                 */
 532
 533                if ((16 * ec->Lclean < ec->Ltx)) {
 534                        /*
 535                         * Our e/c has improved echo by at least 24 dB (each
 536                         * factor of 2 is 6dB, so 2*2*2*2=16 is the same as
 537                         * 6+6+6+6=24dB)
 538                         */
 539                        if (ec->adaption_mode & ECHO_CAN_USE_CNG) {
 540                                ec->cng_level = ec->Lbgn;
 541
 542                                /*
 543                                 * Very elementary comfort noise generation.
 544                                 * Just random numbers rolled off very vaguely
 545                                 * Hoth-like.  DR: This noise doesn't sound
 546                                 * quite right to me - I suspect there are some
 547                                 * overlfow issues in the filtering as it's too
 548                                 * "crackly".
 549                                 * TODO: debug this, maybe just play noise at
 550                                 * high level or look at spectrum.
 551                                 */
 552
 553                                ec->cng_rndnum =
 554                                    1664525U * ec->cng_rndnum + 1013904223U;
 555                                ec->cng_filter =
 556                                    ((ec->cng_rndnum & 0xFFFF) - 32768 +
 557                                     5 * ec->cng_filter) >> 3;
 558                                ec->clean_nlp =
 559                                    (ec->cng_filter * ec->cng_level * 8) >> 14;
 560
 561                        } else if (ec->adaption_mode & ECHO_CAN_USE_CLIP) {
 562                                /* This sounds much better than CNG */
 563                                if (ec->clean_nlp > ec->Lbgn)
 564                                        ec->clean_nlp = ec->Lbgn;
 565                                if (ec->clean_nlp < -ec->Lbgn)
 566                                        ec->clean_nlp = -ec->Lbgn;
 567                        } else {
 568                                /*
 569                                 * just mute the residual, doesn't sound very
 570                                 * good, used mainly in G168 tests
 571                                 */
 572                                ec->clean_nlp = 0;
 573                        }
 574                } else {
 575                        /*
 576                         * Background noise estimator.  I tried a few
 577                         * algorithms here without much luck.  This very simple
 578                         * one seems to work best, we just average the level
 579                         * using a slow (1 sec time const) filter if the
 580                         * current level is less than a (experimentally
 581                         * derived) constant.  This means we dont include high
 582                         * level signals like near end speech.  When combined
 583                         * with CNG or especially CLIP seems to work OK.
 584                         */
 585                        if (ec->Lclean < 40) {
 586                                ec->Lbgn_acc += abs(ec->clean) - ec->Lbgn;
 587                                ec->Lbgn = (ec->Lbgn_acc + (1 << 11)) >> 12;
 588                        }
 589                }
 590        }
 591
 592        /* Roll around the taps buffer */
 593        if (ec->curr_pos <= 0)
 594                ec->curr_pos = ec->taps;
 595        ec->curr_pos--;
 596
 597        if (ec->adaption_mode & ECHO_CAN_DISABLE)
 598                ec->clean_nlp = rx;
 599
 600        /* Output scaled back up again to match input scaling */
 601
 602        return (int16_t) ec->clean_nlp << 1;
 603}
 604EXPORT_SYMBOL_GPL(oslec_update);
 605
 606/* This function is seperated from the echo canceller is it is usually called
 607   as part of the tx process.  See rx HP (DC blocking) filter above, it's
 608   the same design.
 609
 610   Some soft phones send speech signals with a lot of low frequency
 611   energy, e.g. down to 20Hz.  This can make the hybrid non-linear
 612   which causes the echo canceller to fall over.  This filter can help
 613   by removing any low frequency before it gets to the tx port of the
 614   hybrid.
 615
 616   It can also help by removing and DC in the tx signal.  DC is bad
 617   for LMS algorithms.
 618
 619   This is one of the classic DC removal filters, adjusted to provide
 620   sufficient bass rolloff to meet the above requirement to protect hybrids
 621   from things that upset them. The difference between successive samples
 622   produces a lousy HPF, and then a suitably placed pole flattens things out.
 623   The final result is a nicely rolled off bass end. The filtering is
 624   implemented with extended fractional precision, which noise shapes things,
 625   giving very clean DC removal.
 626*/
 627
 628int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx)
 629{
 630        int tmp, tmp1;
 631
 632        if (ec->adaption_mode & ECHO_CAN_USE_TX_HPF) {
 633                tmp = tx << 15;
 634
 635                /*
 636                 * Make sure the gain of the HPF is 1.0. The first can still
 637                 * saturate a little under impulse conditions, and it might
 638                 * roll to 32768 and need clipping on sustained peak level
 639                 * signals. However, the scale of such clipping is small, and
 640                 * the error due to any saturation should not markedly affect
 641                 * the downstream processing.
 642                 */
 643                tmp -= (tmp >> 4);
 644
 645                ec->tx_1 += -(ec->tx_1 >> DC_LOG2BETA) + tmp - ec->tx_2;
 646                tmp1 = ec->tx_1 >> 15;
 647                if (tmp1 > 32767)
 648                        tmp1 = 32767;
 649                if (tmp1 < -32767)
 650                        tmp1 = -32767;
 651                tx = tmp1;
 652                ec->tx_2 = tmp;
 653        }
 654
 655        return tx;
 656}
 657EXPORT_SYMBOL_GPL(oslec_hpf_tx);
 658
 659MODULE_LICENSE("GPL");
 660MODULE_AUTHOR("David Rowe");
 661MODULE_DESCRIPTION("Open Source Line Echo Canceller");
 662MODULE_VERSION("0.3.0");
 663