%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import scipy.signal as sp
import IPython
from scipy.io import wavfile

plt.rcParams["figure.figsize"] = (14,4)

def add_echo(x, M, lmb=0.6, alpha=-0.8):
    # if the first argument is a scalar, assume the input is a delta sequence of length x
    #  in this case, the function returns the truncated impulse response of the room.
    if np.isscalar(x):
        x = np.zeros(int(x))
        x[0] = 1
    y = np.zeros(len(x))
    for n in range(0, len(x)):
        if n >= M:
            y[n] = x[n] - lmb * x[n-1] + lmb * y[n-1] + alpha * (1 - lmb) * y[n - M]
        elif n > 0:
            y[n] = x[n] - lmb * x[n-1] + lmb * y[n-1]
        else:
            y[n] = x[n]
    return y

plt.plot(add_echo(1000, 100));

Fs, speech = wavfile.read('speech2.wav')
speech = speech / 32767.0 # scale the signal to floats in [-1, 1]
print('sampling rate:', Fs, 'Hz, data length:', len(speech), 'samples')
IPython.display.Audio(speech, rate=Fs)

sampling rate: 8000 Hz, data length: 19063 samples

speech_echo = add_echo(speech, int(0.020 * Fs))
IPython.display.Audio(speech_echo, rate=Fs)

IPython.display.Audio(np.r_[speech_echo, speech_echo - speech], rate=Fs)

def lms(x, d, N, step_size=0.001):
    # Run the LMS adaptation using x as the input signal, d as the desired output signal and a as the step size
    # Will return an N-tap FIR filter
    #
    # initial guess for the filter is a delta
    h = np.zeros(N)
    h[0] = 1
    # number of iterations
    L = min(len(x), len(d))
    # let's store the error at each iteration to plot the MSE
    e = np.zeros(L)
    # run the adaptation
    for n in range(N, L):
        e[n] = d[n] - np.dot(h, x[n:n-N:-1])
        h = h + step_size * e[n] * x[n:n-N:-1]
    return h, e[N:]

# create the signals
wgn = np.random.randn(10000)

DELAY = 160
wgn_echo = add_echo(wgn, DELAY)

TAPS = 500
h_orig = add_echo(TAPS, DELAY)

def plot_lms_test(N, step_size=0.0008):
    h_est, _ = lms(wgn[:N], wgn_echo[:N], TAPS, step_size)

    plt.plot(h_orig, 'g', label='original impulse response'); 
    plt.plot(h_est, 'r', label=f'estimated impulse response ({N} iterations)');
    plt.legend();
    return h_est

plot_lms_test(1000);

plot_lms_test(2000);

h_est = plot_lms_test(5000);

plt.plot(h_orig - h_est);

h_est = plot_lms_test(5000, step_size=0.002);

plt.plot(h_orig - h_est);

plot_lms_test(5000, step_size=0.005);

def wgn_mse(L, step_size=0.0008):
    wgn = np.random.randn(L)
    wgn_echo = add_echo(wgn, DELAY)
    _, err = lms(wgn, wgn_echo, TAPS, step_size)
    return np.square(err)
    
    
TRIALS = 200 # number of independent experiments
L = 6000

mse = wgn_mse(L)
for n in range(1, TRIALS):
    mse = mse + wgn_mse(L)
mse = mse / TRIALS   
plt.plot(mse);

# let's build the echo signal with a 20ms delay
speech_echo_delay = int(0.020 * Fs)
audio = np.tile(speech, 5) 

# now let's estimate the first 1000 taps of the echo impulse response using the speech signal
speech_echo_taps = 1500
h_est, err = lms(audio, add_echo(audio, speech_echo_delay), speech_echo_taps, step_size=0.021)

h_orig = add_echo(len(h_est), speech_echo_delay)

plt.plot(h_orig, 'g', label='original impulse response'); 
plt.plot(h_est, 'r', label=f'estimated impulse response ({len(audio)} iterations)');
plt.legend();

plt.plot(h_orig - h_est);

speech_echo = add_echo(speech, speech_echo_delay)
speech_echo_estimated = sp.lfilter(h_est, 1, speech)

audio_sequence = np.r_[speech_echo, speech_echo - speech, speech_echo - speech_echo_estimated]

IPython.display.Audio(audio_sequence, rate=Fs)

plt.plot(audio_sequence);

Adaptive Echo Cancellation¶

1. The echo model¶

How does it sound?¶

2. The LMS filter¶

Convergence tests with WGN¶

Eror decay¶

3. The LMS echo canceler¶