-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmodel.py
92 lines (77 loc) · 3.89 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
'''
WARNING: This code is full of bugs. Can you squash them all?
We've created a super-awesome sentiment classification tool
that recognizes whether a movie review is good or bad.
However, it does not work as expected... Why?
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
import data_utils
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# This seems like a rough implementation of
# [Yoon Kim, 2014, Convolutional Neural Networks for Sentence Classification](https://arxiv.org/pdf/1408.5882.pdf)
# ... but is it really so?
# NOTE: you do not need to implement everything (for example, every hyperparameter)
# as proposed in the paper to make the code work.
class CnnClassifier(nn.Module):
def __init__(self, vocab_size,
vocab_embed_size=50,
filter_sizes=[2, 3, 4, 5],
out_dim=300,
num_classes=2,
dropout=0.99,
pad_index=2):
super().__init__()
self.vocab_size = vocab_size
self.vocab_embed_size = vocab_embed_size
self.pad_index = pad_index
# we will use multiple width convolution filters to capture character patterns for a sentiment
self.filter_sizes = sorted(filter_sizes)
# we will concat the max-pooled outputs of the convolutions
self.num_filters = out_dim // len(filter_sizes)
# this is the concatenated hidden dimension
self.out_dim = out_dim
# since this is a binary classification problem, number of classes is 2
self.num_classes = num_classes
assert not self.out_dim % self.num_filters
# character embedding matrix
self.embedding = nn.Embedding(
self.vocab_size, self.vocab_embed_size,
padding_idx=self.pad_index)
self.embedding_dropout = nn.Dropout(p=dropout)
self.pool = nn.AdaptiveMaxPool1d(1) # we will max pool the output of convolution filters of each width
self.convs = [nn.Conv1d(self.vocab_embed_size, self.num_filters, width).to(device)
for width in filter_sizes] # convolution for filters of varying width
self.fc = nn.Linear(self.out_dim, self.num_classes) # final layer to convert deep features to binary scores
self.fc_dropout = nn.Dropout(p=dropout)
# weight initialization
for p in self.parameters():
p.data.fill_(0)
def forward(self, x, real_len=None):
# lengths of the real inputs.
# the data would be padded like [word, world, ..., last_word, pad, pad, pad ...]
# so we can only calculate the number of non-padding characters
# to know the real lengths of sequences, not zero-padded...
if real_len is None:
real_len = torch.sum(x.ne(self.pad_index), dim=0)
x = self.embedding_dropout(self.embedding(x))
# exception handling, in case that the number of characters in the sentence
# is smaller than the convolution filter width.
# pad with zero, so that error does not occur.
if x.size(1) < max(self.filter_sizes):
x = torch.cat([x, torch.zeros(
x.size(0),
max(self.filter_sizes) - x.size(1),
x.size(2)).to(device)], dim=1)
# we can avoid unnecessary calculations by excluding the all-zero part
# from the input.
L = max(real_len.max().int(), max(self.filter_sizes))
x = x[:, :L, :] # [B, L', D]
# reshape the input to fit to the input shape for convolutions
x = x.contiguous().view(-1, x.size(2), x.size(1)) # [B, D, L']
conv_outputs = [] # gather convolution outputs here
for conv in self.convs:
conv_outputs += [self.pool(F.relu(conv(x))).squeeze(-1)]
hidden = torch.cat(conv_outputs, dim=-1) # concatenate to get [B, D']
return self.fc_dropout(self.fc(hidden)) # [B, 2]