-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcheck_VAD.m
112 lines (92 loc) · 2.67 KB
/
check_VAD.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
function flag_VAD = check_VAD(x,sr)
% FUnction to perform VAD similar to ETSI feature extraction
% Samples should be read from raw format file
% Details in ETSI ES 202 050 Document
% CONSTANTS
if sr ~= 8000
x = resample(x,8000,sr); % Resample the test data to 8kHz for determining VAD information
end
sr = 8000;
NB_FRAME_THRESHOLD_LTE = 10;
LAMBDA_LTE = 0.97;
M =80;
SNR_THRESHOLD_UPD_LTE = 20;
ENERGY_FLOOR = 80;
MIN_FRAME = 10;
lambdaLTEhigherE =0.99;
SNR_THRESHOLD_VAD =15 ;
MIN_SPEECH_FRAME_HANGOVER =4;
HANGOVER =15;
% Initialization
nbSpeechFrame =0;
meanEn =0;
hangOver =0;
% Frame the signal into 25ms windows with a shift of 10ms
flen = 0.025*sr;
W = hanning(flen);
SP =0.4;
x_fr = segment(x,flen,SP,W);
N_fr = size(x_fr,2);
flag_VAD = zeros(1,N_fr);
for t = 1 : N_fr
x_cur = x_fr(:,t);
if t < NB_FRAME_THRESHOLD_LTE
lambdaLTE = 1 -1/t;
else
lambdaLTE = LAMBDA_LTE;
end
frameEn = 0.5 + 16/(log(2)) * (log((64 + sum(x_cur(end-79:end).^2))/64));
if (frameEn - meanEn) < SNR_THRESHOLD_UPD_LTE || t < MIN_FRAME
if frameEn < meanEn || t < MIN_FRAME
meanEn = meanEn + (1-lambdaLTE)*(frameEn- meanEn);
else
meanEn = meanEn + (1-lambdaLTEhigherE)*(frameEn- meanEn);
end
if meanEn < ENERGY_FLOOR
meanEn = ENERGY_FLOOR;
end
end
if t > 4
if (frameEn -meanEn) > SNR_THRESHOLD_VAD
flag_VAD(t) = 1;
nbSpeechFrame = nbSpeechFrame+1;
else
if nbSpeechFrame > MIN_SPEECH_FRAME_HANGOVER
hangOver = HANGOVER;
end
nbSpeechFrame = 0;
if hangOver ~= 0
hangOver = hangOver - 1;
flag_VAD(t) = 1;
else
flag_VAD(t) = 0;
end
end
end
end
function Seg=segment(signal,W,SP,Window)
% SEGMENT chops a signal to overlapping windowed segments
% A= SEGMENT(X,W,SP,WIN) returns a matrix which its columns are segmented
% and windowed frames of the input one dimentional signal, X. W is the
% number of samples per window, default value W=256. SP is the shift
% percentage, default value SP=0.4. WIN is the window that is multiplied by
% each segment and its length should be W. the default window is hamming
% window.
% 06-Sep-04
% Esfandiar Zavarehei
if nargin<3
SP=.4;
end
if nargin<2
W=256;
end
if nargin<4
Window=hamming(W);
end
Window=Window(:); %make it a column vector
L=length(signal);
SP=fix(W.*SP);
N=fix((L-W)/SP +1); %number of segments
Index=(repmat(1:W,N,1)+repmat((0:(N-1))'*SP,1,W))';
hw=repmat(Window,1,N);
Seg=signal(Index).*hw;