Files
deepxi-flask-server/lib/dev/add_noise.py
2020-04-20 22:05:35 +07:00

79 lines
2.4 KiB
Python

## FILE: add_noise.py
## DATE: 2019
## AUTHOR: Aaron Nicolson
## AFFILIATION: Signal Processing Laboratory, Griffith University
## BRIEF: Add noise to clean speech at set SNR level.
##
## This Source Code Form is subject to the terms of the Mozilla Public
## License, v. 2.0. If a copy of the MPL was not distributed with this
## file, You can obtain one at http://mozilla.org/MPL/2.0/.
import tensorflow as tf
def add_noise_batch(s, d, s_len, d_len, SNR):
'''
Creates noisy speech batch from clean speech, noise, and SNR batches.
Input/s:
s - clean waveforms (dtype=tf.int32).
d - noisy waveforms (dtype=tf.int32).
s_len - clean waveform lengths without padding (samples).
d_len - noise waveform lengths without padding (samples).
SNR - SNR levels.
Output/s:
tuple consisting of clean speech, noisy speech, and noise (x, s, d).
'''
return tf.map_fn(lambda z: add_noise_pad(z[0], z[1], z[2], z[3], z[4],
tf.reduce_max(s_len)), (s, d, s_len, d_len, SNR), dtype=(tf.float32, tf.float32,
tf.float32))
def add_noise_pad(s, d, s_len, d_len, SNR, P):
'''
Calls addnoise() and pads the waveforms to the length given by P.
Also normalises the waveforms.
Inputs:
s - clean speech waveform.
d - noise waveform.
s_len - length of s.
d_len - length of d.
SNR - SNR level.
P - padded length.
Outputs:
s - padded clean speech waveform.
x - padded noisy speech waveform.
d - truncated, scaled, and padded noise waveform.
'''
s = tf.truediv(tf.cast(tf.slice(s, [0], [s_len]), tf.float32), 32768.0)
d = tf.truediv(tf.cast(tf.slice(d, [0], [d_len]), tf.float32), 32768.0)
(x, d) = add_noise(s, d, SNR)
total_zeros = tf.subtract(P, tf.shape(s)[0])
x = tf.pad(x, [[0, total_zeros]], "CONSTANT")
s = tf.pad(s, [[0, total_zeros]], "CONSTANT")
d = tf.pad(d, [[0, total_zeros]], "CONSTANT")
return (x, s, d)
def add_noise(s, d, SNR):
'''
Adds noise to the clean waveform at a specific SNR value. A random section
of the noise waveform is used.
Inputs:
s - clean waveform.
d - noise waveform.
SNR - SNR level.
Outputs:
x - noisy speech waveform.
d - truncated and scaled noise waveform.
'''
s_len = tf.shape(s)[0]
d_len = tf.shape(d)[0]
i = tf.random_uniform([1], 0, tf.add(1, tf.subtract(d_len, s_len)), tf.int32)
d = tf.slice(d, [i[0]], [s_len])
d = tf.multiply(tf.truediv(d, tf.norm(d)), tf.truediv(tf.norm(s),
tf.pow(10.0, tf.multiply(0.05, SNR))))
x = tf.add(s, d)
return (x, d)