Python torch.nn 模块,Parameter() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用torch.nn.Parameter()。
def forward(self, x):
x_shape = x.size() # (b,c,h,w)
offset = self.offset_filter(x) # (b,2*c,w)
offset_w, offset_h = torch.split(offset, self.regular_filter.in_channels, 1) # (b,w)
offset_w = offset_w.contiguous().view(-1, int(x_shape[2]), int(x_shape[3])) # (b*c,w)
offset_h = offset_h.contiguous().view(-1,w)
if not self.input_shape or self.input_shape != x_shape:
self.input_shape = x_shape
grid_w, grid_h = np.meshgrid(np.linspace(-1, 1, x_shape[3]), np.linspace(-1, x_shape[2])) # (h,w)
grid_w = torch.Tensor(grid_w)
grid_h = torch.Tensor(grid_h)
if self.cuda:
grid_w = grid_w.cuda()
grid_h = grid_h.cuda()
self.grid_w = nn.Parameter(grid_w)
self.grid_h = nn.Parameter(grid_h)
offset_w = offset_w + self.grid_w # (b*c,w)
offset_h = offset_h + self.grid_h # (b*c,w)
x = x.contiguous().view(-1, int(x_shape[3])).unsqueeze(1) # (b*c,1,w)
x = F.grid_sample(x, torch.stack((offset_h, offset_w), 3)) # (b*c, int(x_shape[1]), int(x_shape[3])) # (b,w)
x = self.regular_filter(x)
return x
def test_parameters(self):
def num_params(module):
return len(list(module.parameters()))
class Net(nn.Container):
def __init__(self):
super(Net, self).__init__(
l1=l,
l2=l
)
self.param = Parameter(torch.Tensor(3, 5))
l = nn.Linear(10, 20)
n = Net()
s = nn.Sequential(n, n, n)
self.assertEqual(num_params(l), 2)
self.assertEqual(num_params(n), 3)
self.assertEqual(num_params(s), 3)
def __init__(self,
num_heads: int,
input_dim: int,
attention_dim: int,
values_dim: int,
output_projection_dim: int = None,
attention_dropout_prob: float = 0.1) -> None:
super(MultiHeadSelfAttention, self).__init__()
self._num_heads = num_heads
self._input_dim = input_dim
self._output_dim = output_projection_dim or input_dim
self._attention_dim = attention_dim
self._values_dim = values_dim
self._query_projections = Parameter(torch.FloatTensor(num_heads, input_dim, attention_dim))
self._key_projections = Parameter(torch.FloatTensor(num_heads, attention_dim))
self._value_projections = Parameter(torch.FloatTensor(num_heads, values_dim))
self._scale = input_dim ** 0.5
self._output_projection = Linear(num_heads * values_dim,
self._output_dim)
self._attention_dropout = Dropout(attention_dropout_prob)
self.reset_parameters()
def __init__(self, shared_resources: SharedResources):
super(FastQAPyTorchModule, self).__init__()
self._shared_resources = shared_resources
input_size = shared_resources.config["repr_dim_input"]
size = shared_resources.config["repr_dim"]
self._size = size
self._with_char_embeddings = self._shared_resources.config.get("with_char_embeddings", False)
# modules & parameters
if self._with_char_embeddings:
self._conv_char_embedding = embedding.ConvCharEmbeddingModule(
len(shared_resources.char_vocab), size)
self._embedding_projection = nn.Linear(size + input_size, size)
self._embedding_highway = Highway(size, 1)
self._v_wiq_w = nn.Parameter(torch.ones(1, input_size + size))
input_size = size
else:
self._v_wiq_w = nn.Parameter(torch.ones(1, input_size))
self._bilstm = BiLSTM(input_size + 2, size)
self._answer_layer = FastQAAnswerModule(shared_resources)
# [size,2 * size]
self._question_projection = nn.Parameter(torch.cat([torch.eye(size), torch.eye(size)], dim=1))
self._support_projection = nn.Parameter(torch.cat([torch.eye(size), dim=1))
def __init__(self, n_in, n_out, dropout=0, rnn_dropout=0,
bidirectional=False, use_tanh=1, use_relu=0):
super(SRUCell, self).__init__()
self.n_in = n_in
self.n_out = n_out
self.rnn_dropout = rnn_dropout
self.dropout = dropout
self.bidirectional = bidirectional
self.activation_type = 2 if use_relu else (1 if use_tanh else 0)
out_size = n_out*2 if bidirectional else n_out
k = 4 if n_in != out_size else 3
self.size_per_dir = n_out*k
self.weight = nn.Parameter(torch.Tensor(
n_in,
self.size_per_dir*2 if bidirectional else self.size_per_dir
))
self.bias = nn.Parameter(torch.Tensor(
n_out*4 if bidirectional else n_out*2
))
self.init_weight()
def load_embeddings(self, state_dict):
self_state_dict = self.state_dict()
self_states = set(self_state_dict.keys())
states = set(state_dict)
assert self_states & states, "Given state dict does not contain " \
"word embedding params"
for name, param in state_dict.items():
if name not in self_state_dict:
continue
if isinstance(param, nn.Parameter):
param = param.data
self_state_dict[name].copy_(param)
def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1):
super(MultiHeadAttention, self).__init__()
self.n_head = n_head
self.d_k = d_k
self.d_v = d_v
self.w_qs = nn.Parameter(torch.FloatTensor(n_head, d_k))
self.w_ks = nn.Parameter(torch.FloatTensor(n_head, d_k))
self.w_vs = nn.Parameter(torch.FloatTensor(n_head, d_v))
self.attention = ScaledDotProductAttention(d_model)
self.layer_norm = Layernormalization(d_model)
self.proj = Linear(n_head*d_v, d_model)
self.dropout = nn.Dropout(dropout)
init.xavier_normal(self.w_qs)
init.xavier_normal(self.w_ks)
init.xavier_normal(self.w_vs)
def copy_state_dict(state_dict, model, strip=None):
tgt_state = model.state_dict()
copied_names = set()
for name, param in state_dict.items():
if strip is not None and name.startswith(strip):
name = name[len(strip):]
if name not in tgt_state:
continue
if isinstance(param, Parameter):
param = param.data
if param.size() != tgt_state[name].size():
print('mismatch:', name, param.size(), tgt_state[name].size())
continue
tgt_state[name].copy_(param)
copied_names.add(name)
missing = set(tgt_state.keys()) - copied_names
if len(missing) > 0:
print("missing keys in state_dict:", missing)
return model
def __init__(self, num_features, max_length, eps=1e-5, momentum=0.1,
affine=True):
"""
Most parts are copied from
torch.nn.modules.batchnorm._Batchnorm.
"""
super(SeparatedBatchnorm1d, self).__init__()
self.num_features = num_features
self.max_length = max_length
self.affine = affine
self.eps = eps
self.momentum = momentum
if self.affine:
self.weight = nn.Parameter(torch.FloatTensor(num_features))
self.bias = nn.Parameter(torch.FloatTensor(num_features))
else:
self.register_parameter('weight', None)
self.register_parameter('bias', None)
for i in range(max_length):
self.register_buffer(
'running_mean_{}'.format(i), torch.zeros(num_features))
self.register_buffer(
'running_var_{}'.format(i), torch.ones(num_features))
self.reset_parameters()
def __init__(self, input_size, hidden_size, use_bias=True):
"""
Most parts are copied from torch.nn.LSTMCell.
"""
super(LSTMCell, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.use_bias = use_bias
self.weight_ih = nn.Parameter(
torch.FloatTensor(input_size, 4 * hidden_size))
self.weight_hh = nn.Parameter(
torch.FloatTensor(hidden_size, 4 * hidden_size))
if use_bias:
self.bias = nn.Parameter(torch.FloatTensor(4 * hidden_size))
else:
self.register_parameter('bias', None)
self.reset_parameters()
def __init__(self, use_bias=True):
super(BNLSTMCell, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.max_length = max_length
self.use_bias = use_bias
self.weight_ih = nn.Parameter(
torch.FloatTensor(input_size, None)
# BN parameters
self.bn_ih = SeparatedBatchnorm1d(
num_features=4 * hidden_size, max_length=max_length)
self.bn_hh = SeparatedBatchnorm1d(
num_features=4 * hidden_size, max_length=max_length)
self.bn_c = SeparatedBatchnorm1d(
num_features=hidden_size, max_length=max_length)
self.reset_parameters()
def __init__(self, vocab_size, tag_to_ix, embedding_dim, hidden_dim):
super(BiLSTM_CRF, self).__init__()
self.embedding_dim = embedding_dim
self.hidden_dim = hidden_dim
self.vocab_size = vocab_size
self.tag_to_ix = tag_to_ix
self.tagset_size = len(tag_to_ix)
self.word_embeds = nn.Embedding(vocab_size, padding_idx = 0)
self.lstm = nn.LSTM(embedding_dim, hidden_dim // 2,
num_layers=1, bidirectional=True)
# Maps the output of the LSTM into tag space.
self.hidden2tag = nn.Linear(hidden_dim, self.tagset_size)
# Matrix of transition parameters. Entry i,j is the score of
# transitioning *to* i *from* j.
self.transitions = nn.Parameter(
torch.randn(self.tagset_size, self.tagset_size))
self.hidden = self.init_hidden()
def __init__(self, question_size, passage_size, attn_size=None,
cell_type=nn.GRUCell, num_layers=1, residual=False, **kwargs):
super().__init__()
self.num_layers = num_layers
if attn_size is None:
attn_size = question_size
# Todo: what is V_q? (section 3.4)
v_q_size = question_size
self.question_pooling = AttentionPooling(question_size,
v_q_size, attn_size=attn_size)
self.passage_pooling = AttentionPooling(passage_size,
question_size, attn_size=attn_size)
self.V_q = nn.Parameter(torch.randn(1, v_q_size), requires_grad=True)
self.cell = StackedCell(question_size, num_layers=num_layers,
dropout=dropout, rnn_cell=cell_type, residual=residual, **kwargs)
def __init__(self, fea_size, dropout=False, gate_width=128, use_region=True, use_kernel_function=False):
super(Hierarchical_Message_Passing_Structure_base, self).__init__()
#self.w_object = Parameter()
if use_kernel_function:
Message_Passing_Unit = Message_Passing_Unit_v2
else:
Message_Passing_Unit = Message_Passing_Unit_v1
self.gate_sub2pred = Message_Passing_Unit(fea_size, gate_width)
self.gate_obj2pred = Message_Passing_Unit(fea_size, gate_width)
self.gate_pred2sub = Message_Passing_Unit(fea_size, gate_width)
self.gate_pred2obj = Message_Passing_Unit(fea_size, gate_width)
self.GRU_object = Gated_Recurrent_Unit(fea_size, dropout) # nn.GRUCell(fea_size,fea_size) #
self.GRU_phrase = Gated_Recurrent_Unit(fea_size, dropout)
if use_region:
self.gate_pred2reg = Message_Passing_Unit(fea_size, gate_width)
self.gate_reg2pred = Message_Passing_Unit(fea_size, gate_width)
self.GRU_region = Gated_Recurrent_Unit(fea_size, dropout)
def __init__(self, max_len, affine=True):
super(recurrent_Batchnorm, self).__init__()
self.num_features = num_features
self.affine = affine
self.max_len = max_len
self.eps = eps
self.momentum = momentum
if self.affine:
self.weight = nn.Parameter(torch.Tensor(num_features))
self.register_parameter('weight', self.weight)
self.bias = nn.Parameter(torch.Tensor(num_features))
self.register_parameter('bias', self.bias)
else:
self.register_parameter('weight', None)
for i in xrange(max_len):
self.register_buffer('running_mean_{}'.format(i), torch.zeros(num_features))
self.register_buffer('running_var_{}'.format(i), torch.ones(num_features))
self.reset_parameters()
def __init__(self, in_channels, out_channels, kernel_size, bias=True):
super().__init__()
self.conv_t = nn.ConvTranspose1d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=kernel_size,
bias=False
)
if bias:
self.bias = nn.Parameter(
torch.FloatTensor(out_channels, kernel_size)
)
else:
self.register_parameter('bias', None)
self.reset_parameters()
def __init__(self, use_relu=0, use_kernel=True):
super(SRUCell, self).__init__()
self.n_in = n_in
self.n_out = n_out
self.rnn_dropout = rnn_dropout
self.dropout = dropout
self.bidirectional = bidirectional
self.activation_type = 2 if use_relu else (1 if use_tanh else 0)
self.use_kernel = use_kernel
out_size = n_out*2 if bidirectional else n_out
k = 4 if n_in != out_size else 3
self.size_per_dir = n_out*k
self.weight = nn.Parameter(torch.Tensor(
n_in,
self.size_per_dir*2 if bidirectional else self.size_per_dir
))
self.bias = nn.Parameter(torch.Tensor(
n_out*4 if bidirectional else n_out*2
))
self.init_weight()
def __init__(self, opt ):
super(StackLayer2, self).__init__()
self.model_name = 'StackLayer2'
self.opt=opt
#self.fc=nn.Sequential(
# nn.Linear(opt.model_num*opt.num_classes,opt.linear_hidden_size),
# nn.Batchnorm1d(opt.linear_hidden_size),
# nn.ReLU(inplace=True),
# nn.Linear(opt.linear_hidden_size,opt.num_classes)
#)
# self.weights = nn.Parameter(t.zeros(opt.num_classes,opt.model_num))
self.weights=nn.Parameter(t.ones(opt.model_num)/opt.model_num)
#self.fc=nn.Linear(opt.model_num*opt.num_classes,opt.num_classes)
#weights=np.zeros((opt.num_classes,opt.model_num*opt.num_classes),dtype=np.float32)
#for i in range(opt.model_num):
# weights[range(1999),range(i*1999,i*1999+1999)]=0.125
#self.fc.weight.data=t.from_numpy(weights)
def __init__(self, opt ):
super(MultiModelAll2, self).__init__()
self.model_name = 'MultiModelAll2'
self.opt=opt
self.models = []
for _name,_path in zip(opt.model_names, opt.model_paths):
tmp_config = Config().parse(opt.state_dict(),print_=False)
# tmp_config.static=True
tmp_config.embedding_path=None
_model = getattr(models,_name)(tmp_config)
if _path is not None:
_model.load(_path)
self.models.append(_model)
self.models = nn.ModuleList(self.models)
self.model_num = len(self.models)
self.weights = nn.Parameter(t.ones(opt.num_classes,self.model_num))
assert self.opt.loss=='bceloss'
self.eval()
def __init__(self, opt ):
super(MultiModelAll4zhihu, self).__init__()
self.model_name = 'MultiModelAll4zhihu'
self.opt=opt
self.models = []
self.word_embedding=nn.Embedding(411720,256)
self.char_embedding=nn.Embedding(11973,256)
model_opts = t.load(opt.model_path+'.json')
for _name,_path,model_opt_ in zip(opt.model_names, opt.model_paths,model_opts):
tmp_config = Config().parse(model_opt_,print_=False)
tmp_config.embedding_path=None
_model = getattr(models,_name)(tmp_config)
_model.encoder=(self.char_embedding if _model.opt.type_=='char' else self.word_embedding)
self.models.append(_model)
self.models = nn.ModuleList(self.models)
self.model_num = len(self.models)
self.weights = nn.Parameter(t.ones(opt.num_classes,self.model_num))
self.load(opt.model_path)
def __init__(self):
super(POSTag, self).__init__()
self.w = nn.Parameter(torch.randn(postag_nb_layers * 2,
max_sentence_size,
postag_hn_size))
self.h = nn.Parameter(torch.randn(postag_nb_layers * 2,
postag_hn_size))
# Bidirectional LSTM
self.bi_lstm = nn.LSTM(embedding_size,
postag_hn_size,
postag_nb_layers,
bidirectional=True)
self.fc = nn.Linear(postag_hn_size * 2, nb_postags)
def __init__(self):
super(Chunking, self).__init__()
self.input_size = embedding_size \
+ nb_postags \
+ postag_hn_size * 2
self.w = nn.Parameter(torch.randn(chunking_nb_layers * 2,
chunking_hn_size))
self.h = nn.Parameter(torch.randn(chunking_nb_layers * 2,
chunking_hn_size))
self.embedding = nn.Embedding(nb_postags, chunking_postag_emb_size)
self.aux_emb = torch.arange(0, nb_postags)
self.aux_emb = Variable(self.aux_emb).long()
self.bi_lstm = nn.LSTM(self.input_size,
chunking_hn_size,
chunking_nb_layers,
bidirectional=True)
self.fc = nn.Linear(chunking_hn_size * 2, nb_chunktags)
def __init__(self):
super(Dependency, self).__init__()
self.input_size = embedding_size \
+ nb_postags \
+ nb_chunktags \
+ postag_hn_size * 2 \
+ chunking_hn_size * 2
self.w = nn.Parameter(torch.randn(dependency_nb_layers * 2,
dependency_hn_size))
self.h = nn.Parameter(torch.randn(dependency_nb_layers * 2,
dependency_hn_size))
self.bi_lstm = nn.LSTM(self.input_size,
dependency_hn_size,
dependency_nb_layers,
bidirectional=True)
self.wd = nn.Parameter(torch.randn(dependency_hn_size * 2))
self.fc = nn.Linear(dependency_hn_size * 2, 1)
def __init__(self):
super(SentimentClassification, self).__init__()
self.input_size = embedding_size \
+ nb_postags \
+ nb_chunktags \
+ max_sentence_size \
+ postag_hn_size * 2 \
+ chunking_hn_size * 2 \
+ dependency_hn_size * 2
self.w = nn.Parameter(torch.randn(sentiment_nb_layers * 2,
sentiment_hn_size))
self.h = nn.Parameter(torch.randn(sentiment_nb_layers * 2,
sentiment_hn_size))
self.bi_lstm = nn.LSTM(self.input_size,
sentiment_hn_size,
sentiment_nb_layers,
bidirectional=True)
self.fc = nn.Linear(sentiment_hn_size * 2, 1)
def __init__(self, options, GPU = False):
super(CRF, self).__init__()
self.GPU = GPU
if self.GPU:
self.dtype = torch.cuda.FloatTensor
else:
self.dtype = torch.FloatTensor
self.options = options
self.tag_to_ix = options['CLASSES_2_IX']
self.ix_to_tag = {self.tag_to_ix[w]:w for w in self.tag_to_ix}
self.tagset_size = len(self.tag_to_ix)
# Matrix of transition parameters. Entry i,j is the score of
# transitioning *to* i *from* j.
self.transitions = nn.Parameter(
torch.randn(self.tagset_size, self.tagset_size)).type(self.dtype)
self.initial_weights = nn.Parameter(
torch.randn(self.tagset_size, 1)).type(self.dtype)
self.final_weights = nn.Parameter(
torch.randn(self.tagset_size, 1)).type(self.dtype)
def __init__(self, GPU=False):
super(CRF, self).__init__()
self.GPU = GPU
if self.GPU:
self.dtype = torch.cuda.FloatTensor
else:
self.dtype = torch.FloatTensor
self.options = options
self.tag_to_ix = options['CLASSES_2_IX']
self.ix_to_tag = {self.tag_to_ix[w]: w for w in self.tag_to_ix}
self.START_TAG = 'START'
self.STOP_TAG = 'STOP'
if self.START_TAG not in self.tag_to_ix:
self.tag_to_ix[self.START_TAG] = len(self.tag_to_ix)
if self.STOP_TAG not in self.tag_to_ix:
self.tag_to_ix[self.STOP_TAG] = len(self.tag_to_ix)
self.tagset_size = len(self.tag_to_ix)
# Matrix of transition parameters. Entry i,j is the score of
# transitioning *to* i *from* j.
self.transitions = nn.Parameter(torch.randn(self.tagset_size, self.tagset_size).type(self.dtype))
def load_state_dict(module, state_dict):
"""copies parameters and buffers from :attr:`state_dict` into
this module and its descendants. The keys of :attr:`state_dict` must
exactly match the keys returned by this module's :func:`state_dict()`
function.
Arguments:
state_dict (dict): A dict containing parameters and
persistent buffers.
"""
own_state = module.state_dict()
for name, param in state_dict.items():
if name not in own_state:
raise KeyError('unexpected key "{}" in state_dict'
.format(name))
if isinstance(param, Parameter):
# backwards compatibility for serialized parameters
param = param.data
own_state[name].copy_(param)
def __init__(self, grid_size, grid_bounds, n_components, mixing_params=False):
super(AdditiveGridinducingPointModule, self).__init__(grid_size, grid_bounds)
self.n_components = n_components
# Resize variational parameters to have one size per component
self.alpha.resize_(*([n_components] + list(self.alpha.size())))
variational_mean = self.variational_mean
chol_variational_covar = self.chol_variational_covar
variational_mean.data.resize_(*([n_components] + list(variational_mean.size())))
chol_variational_covar.data.resize_(*([n_components] + list(chol_variational_covar.size())))
# Mixing parameters
if mixing_params:
self.register_parameter('mixing_params',
nn.Parameter(torch.Tensor(n_components).fill_(1. / n_components)),
bounds=(-2, 2))
def __init__(self, in_size, out_size, batch_num = 10, epoch_num = 10):
"""
in_size: Data Input Dimension
out_size: Data Output Dimension
batch_num: Batch size of Input
epoch_num: Training Epoches
"""
super(AutoEncoder, self).__init__()
self.in_size = in_size
self.out_size = out_size
self.batch_num = batch_num
self.epoch_num = epoch_num
self.weight1 = nn.Parameter(torch.randn(in_size, out_size), requires_grad = True)
self.bias1 = nn.Parameter(torch.randn(out_size, ), requires_grad = True)
self.bias2 = nn.Parameter(torch.randn(in_size, requires_grad = True)
#self.linear1 = nn.Linear(in_size,out_size)
#self.linear2 = nn.Linear(out_size,in_size)
def create(cls, embeddings, labels, **kwargs):
finetune = kwargs.get('finetune', True)
dsz = embeddings.dsz
model = cls()
model.pdrop = kwargs.get('dropout', 0.5)
model.labels = labels
nc = len(labels)
model.vocab = embeddings.vocab
model.lut = nn.Embedding(embeddings.vsz + 1, dsz)
del model.lut.weight
model.lut.weight = nn.Parameter(torch.FloatTensor(embeddings.weights), requires_grad=finetune)
pool_dim = model._init_pool(dsz, **kwargs)
stacked_dim = model._init_stacked(pool_dim, **kwargs)
model._init_output(stacked_dim, nc)
print(model)
return model
def __init__(self, num_embeddings, padding_idx=None,
max_norm=None, norm_type=2, scale_grad_by_freq=False,
sparse=False, fixed_weight=False):
super(Embedding, self).__init__()
self.num_embeddings = num_embeddings
self.embedding_dim = embedding_dim
self.padding_idx = padding_idx
self.max_norm = max_norm
self.norm_type = norm_type
self.scale_grad_by_freq = scale_grad_by_freq
if fixed_weight:
self.weight = Variable(
torch.Tensor(num_embeddings, embedding_dim),
requires_grad=False)
else:
self.weight = nn.Parameter(
torch.Tensor(num_embeddings, embedding_dim))
self.fixed_weight = fixed_weight
self.sparse = sparse
self.reset_parameters()
def __init__(self, in1_features, in2_features, out_features,
bias=(True, True, True)):
super(Biaffine, self).__init__()
self.in1_features = in1_features
self.in2_features = in2_features
self.out_features = out_features
self._use_bias = bias
shape = (in1_features + int(bias[0]),
in2_features + int(bias[1]),
out_features)
self.weight = nn.Parameter(torch.Tensor(*shape))
if bias[2]:
self.bias = nn.Parameter(torch.Tensor(out_features))
else:
self.register_parameter('bias', None)
self.reset_parameters()
def split_input_channel(self, channel_i):
if channel_i > self.in_channels:
print("cannot split channel {} of {}".format(channel_i, self.in_channels))
return
self.in_channels += 1
orig_weight = self.weight.data
dup_slice = orig_weight[:, channel_i, :] * .5
new_weight = torch.zeros(self.out_channels, self.in_channels, self.kernel_size[0])
if channel_i > 0:
new_weight[:, :channel_i, :] = orig_weight[:, :]
new_weight[:, :] = dup_slice
new_weight[:, channel_i + 1, :] = dup_slice
if channel_i + 1 < self.in_channels:
new_weight[:, channel_i + 2, :]
self.weight = Parameter(new_weight)
self.init_ncc()
def test_forward_computes_forward_pass():
weight = torch.randn(4, 8, 3, 3).cuda()
input = torch.randn(4, 4, 4).cuda()
out = F.conv2d(
input=Variable(input),
weight=Parameter(weight),
bias=None,
stride=1,
padding=1,
dilation=1,
groups=1,
).data
func = _EfficientConv2d(
stride=1,
)
out_efficient = func.forward(weight, None, input)
assert(almost_equal(out, out_efficient))
def __init__(self, capacity):
super(GORUCell, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.capacity = capacity
self.U = nn.Parameter(
torch.FloatTensor(input_size, hidden_size))
self.thetaA = nn.Parameter(
torch.FloatTensor(hidden_size//2, capacity//2))
self.thetaB = nn.Parameter(
torch.FloatTensor(hidden_size//2-1, capacity//2))
self.bias = nn.Parameter(
torch.FloatTensor(hidden_size))
self.gate_U = nn.Parameter(
torch.FloatTensor(input_size, 2 * hidden_size))
self.gate_W = nn.Parameter(
torch.FloatTensor(hidden_size, 2 * hidden_size))
self.gate_bias = nn.Parameter(torch.FloatTensor(2 * hidden_size))
self.reset_parameters()
def init_hidden(self, hidden_dim):
"""Trainable initial hidden state"""
enc_init_hx = Variable(torch.zeros(hidden_dim), requires_grad=False)
if self.use_cuda:
enc_init_hx = enc_init_hx.cuda()
#enc_init_hx.data.uniform_(-(1. / math.sqrt(hidden_dim)),
# 1. / math.sqrt(hidden_dim))
enc_init_cx = Variable(torch.zeros(hidden_dim), requires_grad=False)
if self.use_cuda:
enc_init_cx = enc_init_cx.cuda()
#enc_init_cx = nn.Parameter(enc_init_cx)
#enc_init_cx.data.uniform_(-(1. / math.sqrt(hidden_dim)),
# 1. / math.sqrt(hidden_dim))
return (enc_init_hx, enc_init_cx)
def __init__(self, num_classes, embed_size):
"""
:param num_classes: An int. The number of possible classes.
:param embed_size: An int. Embedding size
"""
super(NEG_loss, self).__init__()
self.num_classes = num_classes
self.embed_size = embed_size
self.out_embed = nn.Embedding(self.num_classes, self.embed_size)
self.out_embed.weight = Parameter(t.FloatTensor(self.num_classes, self.embed_size).uniform_(-1, 1))
self.in_embed = nn.Embedding(self.num_classes, self.embed_size)
self.in_embed.weight = Parameter(t.FloatTensor(self.num_classes, 1))
def init_embeddings(self, weight):
emb_elements = self.embeddings.weight.data.nelement()
mismatch_msg = "Expected " + str(emb_elements) + "elements but got {}"
if isinstance(weight, np.ndarray):
assert emb_elements == weight.size, \
mismatch_msg.format(weight.size)
self.embeddings.weight.data = torch.Tensor(weight)
elif isinstance(weight, torch.Tensor):
assert emb_elements == weight.nelement(), \
mismatch_msg.format(weight.nelement())
self.embeddings.weight.data = weight
elif isinstance(weight, nn.Parameter):
assert emb_elements == weight.nelement(), \
mismatch_msg.format(weight.nelement())
self.embeddings.weight = weight
else:
raise ValueError("UnkNown weight type [{}]".format(type(weight)))
def __init__(self, params):
super(Decoder, self).__init__()
self.params = params
self.kernels = [Parameter(t.Tensor(out_chan, in_chan, width).normal_(0, 0.05))
for out_chan, width in params.decoder_kernels]
self._add_to_parameters(self.kernels, 'decoder_kernel')
self.biases = [Parameter(t.Tensor(out_chan).normal_(0, 0.05))
for out_chan, width in params.decoder_kernels]
self._add_to_parameters(self.biases, 'decoder_bias')
self.out_size = self.params.decoder_kernels[-1][0]
self.fc = nn.Linear(self.out_size, self.params.word_vocab_size)
def __init__(self, start_tag_index, stop_tag_index, tag_size, hidden_dim):
super(EncoderCRF, self).__init__()
self.hidden_dim = hidden_dim
self.start_tag_index = start_tag_index
self.stop_tag_index = stop_tag_index
self.tag_size = tag_size
self.encoder = nn.GRU(embedding_dim,
num_layers=1, bidirectional=True)
self.tag_projection = nn.Linear(hidden_dim, self.tag_size)
self.transitions = nn.Parameter(
torch.randn(self.tag_size, self.tag_size))
self.hidden = self.init_hidden()
def init_duvenaud(self, params):
learn_args = []
learn_modules = []
args = {}
args['out'] = params['out']
# Define a parameter matrix W for each layer.
for l in range(params['layers']):
learn_args.append(nn.Parameter(torch.randn(params['in'][l], params['out'])))
# learn_modules.append(nn.Linear(params['out'],params['target']))
learn_modules.append(NNet(n_in=params['out'], n_out=params['target']))
return nn.ParameterList(learn_args), nn.ModuleList(learn_modules), args
# GG-NN,Li et al.
def __init__(self, action_size=1, init_value=0.0, *args, **kwargs):
super(DiagonalGaussianPolicy, self).__init__(model, **kwargs)
self.init_value = init_value
self.logstd = th.zeros((1, action_size)) + self.init_value
self.logstd = P(self.logstd)
self.halflog2pie = V(T([2 * pi * exp(1)])) * 0.5
self.halflog2pi = V(T([2.0 * pi])) * 0.5
self.pi = V(T([pi]))
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。