# first our usual bookkeeping
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import IPython

plt.rcParams["figure.figsize"] = (14,4)

def dtmf_dial(number, rate=24000):
    DTMF = {
        '1': (697, 1209), '2': (697, 1336), '3': (697, 1477),
        '4': (770, 1209), '5': (770, 1336), '6': (770, 1477),
        '7': (852, 1209), '8': (852, 1336), '9': (852, 1477),
        '*': (941, 1209), '0': (941, 1336), '#': (941, 1477),        
    }
    MARK = 0.1
    SPACE = 0.1
    n = np.arange(0, int(MARK * rate))
    x = np.array([])
    for d in number:
        s = np.sin(2*np.pi * DTMF[d][0] / rate * n) + np.sin(2*np.pi * DTMF[d][1] / rate * n) 
        x = np.concatenate((x, s, np.zeros(int(SPACE * rate))))
    return x

Fs = 24000
x=dtmf_dial('123##45', Fs)

IPython.display.Audio(x, rate=Fs)

plt.plot(x);

# split the signal in 10ms chunks and arrange them as rows in a matrix
# each chunk will contain (Fs * 10ms) samples
# (truncate the data vector to a length multiple of the window len to avoid errors)
win_len = int(0.01 * Fs)
w = np.reshape(x[:int(len(x) / win_len) * win_len], (-1, win_len))

# compute the energy of each chunk by summing the squares of the elements of each row
we = np.sum(w * w, axis=1)

plt.plot(we);

def dtmf_split(x, rate=24000, threshold=200, win_ms=10):
    win_len = int(win_ms * rate / 1000.0)
    edges = []
    
    w = np.reshape(x[:int(len(x) / win_len) * win_len], (-1, win_len))
    we = np.sum(w * w, axis=1)
    L = len(we)
    
    ix = 0
    while ix < L:
        while ix < L and we[ix] < threshold:
            ix = ix+1
        if ix >= L:
            break    # ending on silence
        iy = ix
        while iy < L and we[iy] > threshold:
            iy = iy+1
        edges.append((ix * win_len, iy * win_len))
        ix = iy
    
    return edges

print(dtmf_split(x))

[(0, 2400), (4800, 7200), (9600, 12000), (14400, 16800), (19200, 21600), (24000, 26400), (28800, 31200)]

X = abs(np.fft.fft(x[0:2400]))
plt.plot(X[0:500]);

def dtmf_decode(x, rate=24000, threshold=200):
    # the DTMF frequencies
    LO_FREQS = np.array([697.0, 770.0, 852.0, 941.0])
    HI_FREQS = np.array([1209.0, 1336.0, 1477.0])

    KEYS = [['1', '2', '3'], ['4', '5', '6'], ['7', '8', '9'], ['*', '0', '#']]
    
    # frequency ranges to search for low and high DTMF tones
    LO_RANGE = (680.0, 960.0)
    HI_RANGE = (1180.0, 1500.0)

    number = []
    
    # now examine each tone in turn. the freqency mapping on the DFT
    #  axis will be dependent on the length of the data vector
    edges = dtmf_split(x, rate, threshold)
    for g in edges:
        # compute the DFT of the tone segment
        X = abs(np.fft.fft(x[g[0]:g[1]]))
        N = len(X)
        # compute the resolution in Hz of a DFT bin
        res = float(rate) / N
        
        # find the peak location within the low freq range
        a = int(LO_RANGE[0] / res)
        b = int(LO_RANGE[1] / res)
        lo = a + np.argmax(X[a:b])
        # find the peak location within the high freq range
        a = int(HI_RANGE[0] / res)
        b = int(HI_RANGE[1] / res)
        hi = a + np.argmax(X[a:b])
      
        # now match the results to the DTMF frequencies
        row = np.argmin(abs(LO_FREQS - lo * res))
        col = np.argmin(abs(HI_FREQS - hi * res))

        # and finally convert that to the pressed key
        number.append(KEYS[row][col])
    return number

dtmf_decode(x)

['1', '2', '3', '#', '#', '4', '5']

noisy = x + np.random.uniform(-2, 2, len(x))

IPython.display.Audio(noisy, rate=Fs)

dtmf_decode(noisy)

['3']

dtmf_decode(x, threshold=220)

['1', '2', '3', '#', '#', '4', '5']

dtmf_decode(x, threshold=250)

['2', '2', '5', '5', '5', '5', '5', '5', '4', '4', '5', '5']

	1209 Hz	1336 Hz	1477 Hz
697 Hz	1	2	3
770 Hz	4	5	6
852 Hz	7	8	9
941 Hz	*	0	#

Dual-tone multi-frequency (DTMF) signaling¶

1. Introduction¶

2. The encoder¶

3. The decoder¶

3.1. Signal segmentation¶

3.2. Frequency identification¶

4. Practical considerations¶