Initial commit: RoseTTAFold-All-Atom configured for Wes with Harbor images and s3:// paths
This commit is contained in:
475
rf2aa/model/layers/Attention_module.py
Normal file
475
rf2aa/model/layers/Attention_module.py
Normal file
@@ -0,0 +1,475 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import math
|
||||
from opt_einsum import contract as einsum
|
||||
from rf2aa.util_module import init_lecun_normal
|
||||
class FeedForwardLayer(nn.Module):
|
||||
def __init__(self, d_model, r_ff, p_drop=0.1):
|
||||
super(FeedForwardLayer, self).__init__()
|
||||
self.norm = nn.LayerNorm(d_model)
|
||||
self.linear1 = nn.Linear(d_model, d_model*r_ff)
|
||||
self.dropout = nn.Dropout(p_drop)
|
||||
self.linear2 = nn.Linear(d_model*r_ff, d_model)
|
||||
|
||||
self.reset_parameter()
|
||||
|
||||
def reset_parameter(self):
|
||||
# initialize linear layer right before ReLu: He initializer (kaiming normal)
|
||||
nn.init.kaiming_normal_(self.linear1.weight, nonlinearity='relu')
|
||||
nn.init.zeros_(self.linear1.bias)
|
||||
|
||||
# initialize linear layer right before residual connection: zero initialize
|
||||
nn.init.zeros_(self.linear2.weight)
|
||||
nn.init.zeros_(self.linear2.bias)
|
||||
|
||||
def forward(self, src):
|
||||
src = self.norm(src)
|
||||
src = self.linear2(self.dropout(F.relu_(self.linear1(src))))
|
||||
return src
|
||||
|
||||
class Attention(nn.Module):
|
||||
# calculate multi-head attention
|
||||
def __init__(self, d_query, d_key, n_head, d_hidden, d_out, p_drop=0.1):
|
||||
super(Attention, self).__init__()
|
||||
self.h = n_head
|
||||
self.dim = d_hidden
|
||||
#
|
||||
self.to_q = nn.Linear(d_query, n_head*d_hidden, bias=False)
|
||||
self.to_k = nn.Linear(d_key, n_head*d_hidden, bias=False)
|
||||
self.to_v = nn.Linear(d_key, n_head*d_hidden, bias=False)
|
||||
#
|
||||
self.to_out = nn.Linear(n_head*d_hidden, d_out)
|
||||
self.scaling = 1/math.sqrt(d_hidden)
|
||||
#
|
||||
# initialize all parameters properly
|
||||
self.reset_parameter()
|
||||
|
||||
def reset_parameter(self):
|
||||
# query/key/value projection: Glorot uniform / Xavier uniform
|
||||
nn.init.xavier_uniform_(self.to_q.weight)
|
||||
nn.init.xavier_uniform_(self.to_k.weight)
|
||||
nn.init.xavier_uniform_(self.to_v.weight)
|
||||
|
||||
# to_out: right before residual connection: zero initialize -- to make it sure residual operation is same to the Identity at the begining
|
||||
nn.init.zeros_(self.to_out.weight)
|
||||
nn.init.zeros_(self.to_out.bias)
|
||||
|
||||
def forward(self, query, key, value):
|
||||
B, Q = query.shape[:2]
|
||||
B, K = key.shape[:2]
|
||||
#
|
||||
query = self.to_q(query).reshape(B, Q, self.h, self.dim)
|
||||
key = self.to_k(key).reshape(B, K, self.h, self.dim)
|
||||
value = self.to_v(value).reshape(B, K, self.h, self.dim)
|
||||
#
|
||||
query = query * self.scaling
|
||||
attn = einsum('bqhd,bkhd->bhqk', query, key)
|
||||
attn = F.softmax(attn, dim=-1)
|
||||
#
|
||||
out = einsum('bhqk,bkhd->bqhd', attn, value)
|
||||
out = out.reshape(B, Q, self.h*self.dim)
|
||||
#
|
||||
out = self.to_out(out)
|
||||
|
||||
return out
|
||||
|
||||
# MSA Attention (row/column) from AlphaFold architecture
|
||||
class SequenceWeight(nn.Module):
|
||||
def __init__(self, d_msa, n_head, d_hidden, p_drop=0.1):
|
||||
super(SequenceWeight, self).__init__()
|
||||
self.h = n_head
|
||||
self.dim = d_hidden
|
||||
self.scale = 1.0 / math.sqrt(self.dim)
|
||||
|
||||
self.to_query = nn.Linear(d_msa, n_head*d_hidden)
|
||||
self.to_key = nn.Linear(d_msa, n_head*d_hidden)
|
||||
self.dropout = nn.Dropout(p_drop)
|
||||
self.reset_parameter()
|
||||
|
||||
def reset_parameter(self):
|
||||
# query/key/value projection: Glorot uniform / Xavier uniform
|
||||
nn.init.xavier_uniform_(self.to_query.weight)
|
||||
nn.init.xavier_uniform_(self.to_key.weight)
|
||||
|
||||
def forward(self, msa):
|
||||
B, N, L = msa.shape[:3]
|
||||
|
||||
tar_seq = msa[:,0]
|
||||
|
||||
q = self.to_query(tar_seq).view(B, 1, L, self.h, self.dim)
|
||||
k = self.to_key(msa).view(B, N, L, self.h, self.dim)
|
||||
|
||||
q = q * self.scale
|
||||
attn = einsum('bqihd,bkihd->bkihq', q, k)
|
||||
attn = F.softmax(attn, dim=1)
|
||||
return self.dropout(attn)
|
||||
|
||||
class MSARowAttentionWithBias(nn.Module):
|
||||
def __init__(self, d_msa=256, d_pair=128, n_head=8, d_hidden=32):
|
||||
super(MSARowAttentionWithBias, self).__init__()
|
||||
self.norm_msa = nn.LayerNorm(d_msa)
|
||||
self.norm_pair = nn.LayerNorm(d_pair)
|
||||
#
|
||||
self.seq_weight = SequenceWeight(d_msa, n_head, d_hidden, p_drop=0.1)
|
||||
self.to_q = nn.Linear(d_msa, n_head*d_hidden, bias=False)
|
||||
self.to_k = nn.Linear(d_msa, n_head*d_hidden, bias=False)
|
||||
self.to_v = nn.Linear(d_msa, n_head*d_hidden, bias=False)
|
||||
self.to_b = nn.Linear(d_pair, n_head, bias=False)
|
||||
self.to_g = nn.Linear(d_msa, n_head*d_hidden)
|
||||
self.to_out = nn.Linear(n_head*d_hidden, d_msa)
|
||||
|
||||
self.scaling = 1/math.sqrt(d_hidden)
|
||||
self.h = n_head
|
||||
self.dim = d_hidden
|
||||
|
||||
self.reset_parameter()
|
||||
|
||||
def reset_parameter(self):
|
||||
# query/key/value projection: Glorot uniform / Xavier uniform
|
||||
nn.init.xavier_uniform_(self.to_q.weight)
|
||||
nn.init.xavier_uniform_(self.to_k.weight)
|
||||
nn.init.xavier_uniform_(self.to_v.weight)
|
||||
|
||||
# bias: normal distribution
|
||||
self.to_b = init_lecun_normal(self.to_b)
|
||||
|
||||
# gating: zero weights, one biases (mostly open gate at the begining)
|
||||
nn.init.zeros_(self.to_g.weight)
|
||||
nn.init.ones_(self.to_g.bias)
|
||||
|
||||
# to_out: right before residual connection: zero initialize -- to make it sure residual operation is same to the Identity at the begining
|
||||
nn.init.zeros_(self.to_out.weight)
|
||||
nn.init.zeros_(self.to_out.bias)
|
||||
|
||||
def forward(self, msa, pair): # TODO: make this as tied-attention
|
||||
B, N, L = msa.shape[:3]
|
||||
#
|
||||
msa = self.norm_msa(msa)
|
||||
pair = self.norm_pair(pair)
|
||||
#
|
||||
seq_weight = self.seq_weight(msa) # (B, N, L, h, 1)
|
||||
query = self.to_q(msa).reshape(B, N, L, self.h, self.dim)
|
||||
key = self.to_k(msa).reshape(B, N, L, self.h, self.dim)
|
||||
value = self.to_v(msa).reshape(B, N, L, self.h, self.dim)
|
||||
bias = self.to_b(pair) # (B, L, L, h)
|
||||
gate = torch.sigmoid(self.to_g(msa))
|
||||
#
|
||||
query = query * seq_weight.expand(-1, -1, -1, -1, self.dim)
|
||||
key = key * self.scaling
|
||||
attn = einsum('bsqhd,bskhd->bqkh', query, key)
|
||||
attn = attn + bias
|
||||
attn = F.softmax(attn, dim=-2)
|
||||
#
|
||||
out = einsum('bqkh,bskhd->bsqhd', attn, value).reshape(B, N, L, -1)
|
||||
out = gate * out
|
||||
#
|
||||
out = self.to_out(out)
|
||||
return out
|
||||
|
||||
class MSAColAttention(nn.Module):
|
||||
def __init__(self, d_msa=256, n_head=8, d_hidden=32):
|
||||
super(MSAColAttention, self).__init__()
|
||||
self.norm_msa = nn.LayerNorm(d_msa)
|
||||
#
|
||||
self.to_q = nn.Linear(d_msa, n_head*d_hidden, bias=False)
|
||||
self.to_k = nn.Linear(d_msa, n_head*d_hidden, bias=False)
|
||||
self.to_v = nn.Linear(d_msa, n_head*d_hidden, bias=False)
|
||||
self.to_g = nn.Linear(d_msa, n_head*d_hidden)
|
||||
self.to_out = nn.Linear(n_head*d_hidden, d_msa)
|
||||
|
||||
self.scaling = 1/math.sqrt(d_hidden)
|
||||
self.h = n_head
|
||||
self.dim = d_hidden
|
||||
|
||||
self.reset_parameter()
|
||||
|
||||
def reset_parameter(self):
|
||||
# query/key/value projection: Glorot uniform / Xavier uniform
|
||||
nn.init.xavier_uniform_(self.to_q.weight)
|
||||
nn.init.xavier_uniform_(self.to_k.weight)
|
||||
nn.init.xavier_uniform_(self.to_v.weight)
|
||||
|
||||
# gating: zero weights, one biases (mostly open gate at the begining)
|
||||
nn.init.zeros_(self.to_g.weight)
|
||||
nn.init.ones_(self.to_g.bias)
|
||||
|
||||
# to_out: right before residual connection: zero initialize -- to make it sure residual operation is same to the Identity at the begining
|
||||
nn.init.zeros_(self.to_out.weight)
|
||||
nn.init.zeros_(self.to_out.bias)
|
||||
|
||||
def forward(self, msa):
|
||||
B, N, L = msa.shape[:3]
|
||||
#
|
||||
msa = self.norm_msa(msa)
|
||||
#
|
||||
query = self.to_q(msa).reshape(B, N, L, self.h, self.dim)
|
||||
key = self.to_k(msa).reshape(B, N, L, self.h, self.dim)
|
||||
value = self.to_v(msa).reshape(B, N, L, self.h, self.dim)
|
||||
gate = torch.sigmoid(self.to_g(msa))
|
||||
#
|
||||
query = query * self.scaling
|
||||
attn = einsum('bqihd,bkihd->bihqk', query, key)
|
||||
attn = F.softmax(attn, dim=-1)
|
||||
#
|
||||
out = einsum('bihqk,bkihd->bqihd', attn, value).reshape(B, N, L, -1)
|
||||
out = gate * out
|
||||
#
|
||||
out = self.to_out(out)
|
||||
return out
|
||||
|
||||
class MSAColGlobalAttention(nn.Module):
|
||||
def __init__(self, d_msa=64, n_head=8, d_hidden=8):
|
||||
super(MSAColGlobalAttention, self).__init__()
|
||||
self.norm_msa = nn.LayerNorm(d_msa)
|
||||
#
|
||||
self.to_q = nn.Linear(d_msa, n_head*d_hidden, bias=False)
|
||||
self.to_k = nn.Linear(d_msa, d_hidden, bias=False)
|
||||
self.to_v = nn.Linear(d_msa, d_hidden, bias=False)
|
||||
self.to_g = nn.Linear(d_msa, n_head*d_hidden)
|
||||
self.to_out = nn.Linear(n_head*d_hidden, d_msa)
|
||||
|
||||
self.scaling = 1/math.sqrt(d_hidden)
|
||||
self.h = n_head
|
||||
self.dim = d_hidden
|
||||
|
||||
self.reset_parameter()
|
||||
|
||||
def reset_parameter(self):
|
||||
# query/key/value projection: Glorot uniform / Xavier uniform
|
||||
nn.init.xavier_uniform_(self.to_q.weight)
|
||||
nn.init.xavier_uniform_(self.to_k.weight)
|
||||
nn.init.xavier_uniform_(self.to_v.weight)
|
||||
|
||||
# gating: zero weights, one biases (mostly open gate at the begining)
|
||||
nn.init.zeros_(self.to_g.weight)
|
||||
nn.init.ones_(self.to_g.bias)
|
||||
|
||||
# to_out: right before residual connection: zero initialize -- to make it sure residual operation is same to the Identity at the begining
|
||||
nn.init.zeros_(self.to_out.weight)
|
||||
nn.init.zeros_(self.to_out.bias)
|
||||
|
||||
def forward(self, msa):
|
||||
B, N, L = msa.shape[:3]
|
||||
#
|
||||
msa = self.norm_msa(msa)
|
||||
#
|
||||
query = self.to_q(msa).reshape(B, N, L, self.h, self.dim)
|
||||
query = query.mean(dim=1) # (B, L, h, dim)
|
||||
key = self.to_k(msa) # (B, N, L, dim)
|
||||
value = self.to_v(msa) # (B, N, L, dim)
|
||||
gate = torch.sigmoid(self.to_g(msa)) # (B, N, L, h*dim)
|
||||
#
|
||||
query = query * self.scaling
|
||||
attn = einsum('bihd,bkid->bihk', query, key) # (B, L, h, N)
|
||||
attn = F.softmax(attn, dim=-1)
|
||||
#
|
||||
out = einsum('bihk,bkid->bihd', attn, value).reshape(B, 1, L, -1) # (B, 1, L, h*dim)
|
||||
out = gate * out # (B, N, L, h*dim)
|
||||
#
|
||||
out = self.to_out(out)
|
||||
return out
|
||||
|
||||
# TriangleAttention & TriangleMultiplication from AlphaFold architecture
|
||||
class TriangleAttention(nn.Module):
|
||||
def __init__(self, d_pair, n_head=4, d_hidden=32, p_drop=0.1, start_node=True):
|
||||
super(TriangleAttention, self).__init__()
|
||||
self.norm = nn.LayerNorm(d_pair)
|
||||
self.to_q = nn.Linear(d_pair, n_head*d_hidden, bias=False)
|
||||
self.to_k = nn.Linear(d_pair, n_head*d_hidden, bias=False)
|
||||
self.to_v = nn.Linear(d_pair, n_head*d_hidden, bias=False)
|
||||
|
||||
self.to_b = nn.Linear(d_pair, n_head, bias=False)
|
||||
self.to_g = nn.Linear(d_pair, n_head*d_hidden)
|
||||
|
||||
self.to_out = nn.Linear(n_head*d_hidden, d_pair)
|
||||
|
||||
self.scaling = 1/math.sqrt(d_hidden)
|
||||
|
||||
self.h = n_head
|
||||
self.dim = d_hidden
|
||||
self.start_node=start_node
|
||||
|
||||
self.reset_parameter()
|
||||
|
||||
def reset_parameter(self):
|
||||
# query/key/value projection: Glorot uniform / Xavier uniform
|
||||
nn.init.xavier_uniform_(self.to_q.weight)
|
||||
nn.init.xavier_uniform_(self.to_k.weight)
|
||||
nn.init.xavier_uniform_(self.to_v.weight)
|
||||
|
||||
# bias: normal distribution
|
||||
self.to_b = init_lecun_normal(self.to_b)
|
||||
|
||||
# gating: zero weights, one biases (mostly open gate at the begining)
|
||||
nn.init.zeros_(self.to_g.weight)
|
||||
nn.init.ones_(self.to_g.bias)
|
||||
|
||||
# to_out: right before residual connection: zero initialize -- to make it sure residual operation is same to the Identity at the begining
|
||||
nn.init.zeros_(self.to_out.weight)
|
||||
nn.init.zeros_(self.to_out.bias)
|
||||
|
||||
def forward(self, pair):
|
||||
B, L = pair.shape[:2]
|
||||
|
||||
pair = self.norm(pair)
|
||||
|
||||
# input projection
|
||||
query = self.to_q(pair).reshape(B, L, L, self.h, -1)
|
||||
key = self.to_k(pair).reshape(B, L, L, self.h, -1)
|
||||
value = self.to_v(pair).reshape(B, L, L, self.h, -1)
|
||||
bias = self.to_b(pair) # (B, L, L, h)
|
||||
gate = torch.sigmoid(self.to_g(pair)) # (B, L, L, h*dim)
|
||||
|
||||
# attention
|
||||
query = query * self.scaling
|
||||
if self.start_node:
|
||||
attn = einsum('bijhd,bikhd->bijkh', query, key)
|
||||
else:
|
||||
attn = einsum('bijhd,bkjhd->bijkh', query, key)
|
||||
attn = attn + bias.unsqueeze(1).expand(-1,L,-1,-1,-1) # (bijkh)
|
||||
attn = F.softmax(attn, dim=-2)
|
||||
if self.start_node:
|
||||
out = einsum('bijkh,bikhd->bijhd', attn, value).reshape(B, L, L, -1)
|
||||
else:
|
||||
out = einsum('bijkh,bkjhd->bijhd', attn, value).reshape(B, L, L, -1)
|
||||
out = gate * out # gated attention
|
||||
|
||||
# output projection
|
||||
out = self.to_out(out)
|
||||
return out
|
||||
|
||||
class TriangleMultiplication(nn.Module):
|
||||
def __init__(self, d_pair, d_hidden=128, outgoing=True):
|
||||
super(TriangleMultiplication, self).__init__()
|
||||
self.norm = nn.LayerNorm(d_pair)
|
||||
self.left_proj = nn.Linear(d_pair, d_hidden)
|
||||
self.right_proj = nn.Linear(d_pair, d_hidden)
|
||||
self.left_gate = nn.Linear(d_pair, d_hidden)
|
||||
self.right_gate = nn.Linear(d_pair, d_hidden)
|
||||
#
|
||||
self.gate = nn.Linear(d_pair, d_pair)
|
||||
self.norm_out = nn.LayerNorm(d_hidden)
|
||||
self.out_proj = nn.Linear(d_hidden, d_pair)
|
||||
|
||||
self.outgoing = outgoing
|
||||
|
||||
self.reset_parameter()
|
||||
|
||||
def reset_parameter(self):
|
||||
# normal distribution for regular linear weights
|
||||
self.left_proj = init_lecun_normal(self.left_proj)
|
||||
self.right_proj = init_lecun_normal(self.right_proj)
|
||||
|
||||
# Set Bias of Linear layers to zeros
|
||||
nn.init.zeros_(self.left_proj.bias)
|
||||
nn.init.zeros_(self.right_proj.bias)
|
||||
|
||||
# gating: zero weights, one biases (mostly open gate at the begining)
|
||||
nn.init.zeros_(self.left_gate.weight)
|
||||
nn.init.ones_(self.left_gate.bias)
|
||||
|
||||
nn.init.zeros_(self.right_gate.weight)
|
||||
nn.init.ones_(self.right_gate.bias)
|
||||
|
||||
nn.init.zeros_(self.gate.weight)
|
||||
nn.init.ones_(self.gate.bias)
|
||||
|
||||
# to_out: right before residual connection: zero initialize -- to make it sure residual operation is same to the Identity at the begining
|
||||
nn.init.zeros_(self.out_proj.weight)
|
||||
nn.init.zeros_(self.out_proj.bias)
|
||||
|
||||
def forward(self, pair):
|
||||
B, L = pair.shape[:2]
|
||||
pair = self.norm(pair)
|
||||
|
||||
left = self.left_proj(pair) # (B, L, L, d_h)
|
||||
left_gate = torch.sigmoid(self.left_gate(pair))
|
||||
left = left_gate * left
|
||||
|
||||
right = self.right_proj(pair) # (B, L, L, d_h)
|
||||
right_gate = torch.sigmoid(self.right_gate(pair))
|
||||
right = right_gate * right
|
||||
|
||||
if self.outgoing:
|
||||
out = einsum('bikd,bjkd->bijd', left, right/float(L))
|
||||
else:
|
||||
out = einsum('bkid,bkjd->bijd', left, right/float(L))
|
||||
out = self.norm_out(out)
|
||||
out = self.out_proj(out)
|
||||
|
||||
gate = torch.sigmoid(self.gate(pair)) # (B, L, L, d_pair)
|
||||
out = gate * out
|
||||
return out
|
||||
|
||||
# Instead of triangle attention, use Tied axail attention with bias from coordinates..?
|
||||
class BiasedAxialAttention(nn.Module):
|
||||
def __init__(self, d_pair, d_bias, n_head, d_hidden, p_drop=0.1, is_row=True):
|
||||
super(BiasedAxialAttention, self).__init__()
|
||||
#
|
||||
self.is_row = is_row
|
||||
self.norm_pair = nn.LayerNorm(d_pair)
|
||||
self.norm_bias = nn.LayerNorm(d_bias)
|
||||
|
||||
self.to_q = nn.Linear(d_pair, n_head*d_hidden, bias=False)
|
||||
self.to_k = nn.Linear(d_pair, n_head*d_hidden, bias=False)
|
||||
self.to_v = nn.Linear(d_pair, n_head*d_hidden, bias=False)
|
||||
self.to_b = nn.Linear(d_bias, n_head, bias=False)
|
||||
self.to_g = nn.Linear(d_pair, n_head*d_hidden)
|
||||
self.to_out = nn.Linear(n_head*d_hidden, d_pair)
|
||||
|
||||
self.scaling = 1/math.sqrt(d_hidden)
|
||||
self.h = n_head
|
||||
self.dim = d_hidden
|
||||
|
||||
# initialize all parameters properly
|
||||
self.reset_parameter()
|
||||
|
||||
def reset_parameter(self):
|
||||
# query/key/value projection: Glorot uniform / Xavier uniform
|
||||
nn.init.xavier_uniform_(self.to_q.weight)
|
||||
nn.init.xavier_uniform_(self.to_k.weight)
|
||||
nn.init.xavier_uniform_(self.to_v.weight)
|
||||
|
||||
# bias: normal distribution
|
||||
self.to_b = init_lecun_normal(self.to_b)
|
||||
|
||||
# gating: zero weights, one biases (mostly open gate at the begining)
|
||||
nn.init.zeros_(self.to_g.weight)
|
||||
nn.init.ones_(self.to_g.bias)
|
||||
|
||||
# to_out: right before residual connection: zero initialize -- to make it sure residual operation is same to the Identity at the begining
|
||||
nn.init.zeros_(self.to_out.weight)
|
||||
nn.init.zeros_(self.to_out.bias)
|
||||
|
||||
def forward(self, pair, bias):
|
||||
# pair: (B, L, L, d_pair)
|
||||
B, L = pair.shape[:2]
|
||||
|
||||
if self.is_row:
|
||||
pair = pair.permute(0,2,1,3)
|
||||
bias = bias.permute(0,2,1,3)
|
||||
|
||||
pair = self.norm_pair(pair)
|
||||
bias = self.norm_bias(bias)
|
||||
|
||||
query = self.to_q(pair).reshape(B, L, L, self.h, self.dim)
|
||||
key = self.to_k(pair).reshape(B, L, L, self.h, self.dim)
|
||||
value = self.to_v(pair).reshape(B, L, L, self.h, self.dim)
|
||||
bias = self.to_b(bias) # (B, L, L, h)
|
||||
gate = torch.sigmoid(self.to_g(pair)) # (B, L, L, h*dim)
|
||||
|
||||
query = query * self.scaling
|
||||
key = key / L # normalize for tied attention
|
||||
attn = einsum('bnihk,bnjhk->bijh', query, key) # tied attention
|
||||
attn = attn + bias # apply bias
|
||||
attn = F.softmax(attn, dim=-2) # (B, L, L, h)
|
||||
|
||||
out = einsum('bijh,bnjhd->bnihd', attn, value).reshape(B, L, L, -1)
|
||||
out = gate * out
|
||||
|
||||
out = self.to_out(out)
|
||||
if self.is_row:
|
||||
out = out.permute(0,2,1,3)
|
||||
return out
|
||||
|
||||
111
rf2aa/model/layers/AuxiliaryPredictor.py
Normal file
111
rf2aa/model/layers/AuxiliaryPredictor.py
Normal file
@@ -0,0 +1,111 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from rf2aa.chemical import ChemicalData as ChemData
|
||||
|
||||
class DistanceNetwork(nn.Module):
|
||||
def __init__(self, n_feat, p_drop=0.0):
|
||||
super(DistanceNetwork, self).__init__()
|
||||
#HACK: dimensions are hard coded here
|
||||
self.proj_symm = nn.Linear(n_feat, 61+37) # must match bin counts defined in kinematics.py
|
||||
self.proj_asymm = nn.Linear(n_feat, 37+19)
|
||||
|
||||
self.reset_parameter()
|
||||
|
||||
def reset_parameter(self):
|
||||
# initialize linear layer for final logit prediction
|
||||
nn.init.zeros_(self.proj_symm.weight)
|
||||
nn.init.zeros_(self.proj_asymm.weight)
|
||||
nn.init.zeros_(self.proj_symm.bias)
|
||||
nn.init.zeros_(self.proj_asymm.bias)
|
||||
|
||||
def forward(self, x):
|
||||
# input: pair info (B, L, L, C)
|
||||
|
||||
# predict theta, phi (non-symmetric)
|
||||
logits_asymm = self.proj_asymm(x)
|
||||
logits_theta = logits_asymm[:,:,:,:37].permute(0,3,1,2)
|
||||
logits_phi = logits_asymm[:,:,:,37:].permute(0,3,1,2)
|
||||
|
||||
# predict dist, omega
|
||||
logits_symm = self.proj_symm(x)
|
||||
logits_symm = logits_symm + logits_symm.permute(0,2,1,3)
|
||||
logits_dist = logits_symm[:,:,:,:61].permute(0,3,1,2)
|
||||
logits_omega = logits_symm[:,:,:,61:].permute(0,3,1,2)
|
||||
|
||||
return logits_dist, logits_omega, logits_theta, logits_phi
|
||||
|
||||
class MaskedTokenNetwork(nn.Module):
|
||||
def __init__(self, n_feat, p_drop=0.0):
|
||||
super(MaskedTokenNetwork, self).__init__()
|
||||
|
||||
#fd note this predicts probability for the mask token (which is never in ground truth)
|
||||
# it should be ok though(?)
|
||||
self.proj = nn.Linear(n_feat, ChemData().NAATOKENS)
|
||||
|
||||
self.reset_parameter()
|
||||
|
||||
def reset_parameter(self):
|
||||
nn.init.zeros_(self.proj.weight)
|
||||
nn.init.zeros_(self.proj.bias)
|
||||
|
||||
def forward(self, x):
|
||||
B, N, L = x.shape[:3]
|
||||
logits = self.proj(x).permute(0,3,1,2).reshape(B, -1, N*L)
|
||||
|
||||
return logits
|
||||
|
||||
class LDDTNetwork(nn.Module):
|
||||
def __init__(self, n_feat, n_bin_lddt=50):
|
||||
super(LDDTNetwork, self).__init__()
|
||||
self.proj = nn.Linear(n_feat, n_bin_lddt)
|
||||
|
||||
self.reset_parameter()
|
||||
|
||||
def reset_parameter(self):
|
||||
nn.init.zeros_(self.proj.weight)
|
||||
nn.init.zeros_(self.proj.bias)
|
||||
|
||||
def forward(self, x):
|
||||
logits = self.proj(x) # (B, L, 50)
|
||||
|
||||
return logits.permute(0,2,1)
|
||||
|
||||
class PAENetwork(nn.Module):
|
||||
def __init__(self, n_feat, n_bin_pae=64):
|
||||
super(PAENetwork, self).__init__()
|
||||
self.proj = nn.Linear(n_feat, n_bin_pae)
|
||||
self.reset_parameter()
|
||||
|
||||
def reset_parameter(self):
|
||||
nn.init.zeros_(self.proj.weight)
|
||||
nn.init.zeros_(self.proj.bias)
|
||||
|
||||
def forward(self, x):
|
||||
logits = self.proj(x) # (B, L, L, 64)
|
||||
|
||||
return logits.permute(0,3,1,2)
|
||||
|
||||
class BinderNetwork(nn.Module):
|
||||
def __init__(self, n_bin_pae=64):
|
||||
super(BinderNetwork, self).__init__()
|
||||
self.classify = torch.nn.Linear(n_bin_pae, 1)
|
||||
self.reset_parameter()
|
||||
|
||||
def reset_parameter(self):
|
||||
nn.init.zeros_(self.classify.weight)
|
||||
nn.init.zeros_(self.classify.bias)
|
||||
|
||||
def forward(self, pae, same_chain):
|
||||
logits = pae.permute(0,2,3,1)
|
||||
logits_inter = torch.mean( logits[same_chain==0], dim=0 ).nan_to_num() # all zeros if single chain
|
||||
prob = torch.sigmoid( self.classify( logits_inter ) )
|
||||
return prob
|
||||
|
||||
aux_predictor_factory = {
|
||||
"c6d": DistanceNetwork,
|
||||
"mlm": MaskedTokenNetwork,
|
||||
"plddt": LDDTNetwork,
|
||||
"pae": PAENetwork,
|
||||
"binder": BinderNetwork
|
||||
}
|
||||
458
rf2aa/model/layers/Embeddings.py
Normal file
458
rf2aa/model/layers/Embeddings.py
Normal file
@@ -0,0 +1,458 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from opt_einsum import contract as einsum
|
||||
import torch.utils.checkpoint as checkpoint
|
||||
from rf2aa.util import *
|
||||
from rf2aa.util_module import Dropout, get_clones, create_custom_forward, rbf, init_lecun_normal, get_res_atom_dist
|
||||
from rf2aa.model.layers.Attention_module import Attention, TriangleMultiplication, TriangleAttention, FeedForwardLayer
|
||||
from rf2aa.model.Track_module import PairStr2Pair, PositionalEncoding2D
|
||||
from rf2aa.chemical import ChemicalData as ChemData
|
||||
|
||||
# Module contains classes and functions to generate initial embeddings
|
||||
|
||||
class MSA_emb(nn.Module):
|
||||
# Get initial seed MSA embedding
|
||||
def __init__(self, d_msa=256, d_pair=128, d_state=32, d_init=0,
|
||||
minpos=-32, maxpos=32, maxpos_atom=8, p_drop=0.1, use_same_chain=False, enable_same_chain=False):
|
||||
if (d_init==0):
|
||||
d_init = 2*ChemData().NAATOKENS+2+2
|
||||
|
||||
super(MSA_emb, self).__init__()
|
||||
self.emb = nn.Linear(d_init, d_msa) # embedding for general MSA
|
||||
self.emb_q = nn.Embedding(ChemData().NAATOKENS, d_msa) # embedding for query sequence -- used for MSA embedding
|
||||
self.emb_left = nn.Embedding(ChemData().NAATOKENS, d_pair) # embedding for query sequence -- used for pair embedding
|
||||
self.emb_right = nn.Embedding(ChemData().NAATOKENS, d_pair) # embedding for query sequence -- used for pair embedding
|
||||
self.emb_state = nn.Embedding(ChemData().NAATOKENS, d_state)
|
||||
self.pos = PositionalEncoding2D(d_pair, minpos=minpos, maxpos=maxpos,
|
||||
maxpos_atom=maxpos_atom, p_drop=p_drop, use_same_chain=use_same_chain,
|
||||
enable_same_chain=enable_same_chain)
|
||||
self.enable_same_chain = enable_same_chain
|
||||
self.reset_parameter()
|
||||
|
||||
def reset_parameter(self):
|
||||
self.emb = init_lecun_normal(self.emb)
|
||||
self.emb_q = init_lecun_normal(self.emb_q)
|
||||
self.emb_left = init_lecun_normal(self.emb_left)
|
||||
self.emb_right = init_lecun_normal(self.emb_right)
|
||||
self.emb_state = init_lecun_normal(self.emb_state)
|
||||
|
||||
nn.init.zeros_(self.emb.bias)
|
||||
|
||||
|
||||
def _msa_emb(self, msa, seq):
|
||||
N = msa.shape[1]
|
||||
msa = self.emb(msa) # (B, N, L, d_pair) # MSA embedding
|
||||
tmp = self.emb_q(seq).unsqueeze(1) # (B, 1, L, d_pair) -- query embedding
|
||||
msa = msa + tmp.expand(-1, N, -1, -1) # adding query embedding to MSA
|
||||
|
||||
return msa
|
||||
|
||||
def _pair_emb(self, seq, idx, bond_feats, dist_matrix, same_chain=None):
|
||||
left = self.emb_left(seq)[:,None] # (B, 1, L, d_pair)
|
||||
right = self.emb_right(seq)[:,:,None] # (B, L, 1, d_pair)
|
||||
pair = left + right # (B, L, L, d_pair)
|
||||
pair = pair + self.pos(seq, idx, bond_feats, dist_matrix, same_chain=same_chain) # add relative position
|
||||
|
||||
return pair
|
||||
|
||||
def _state_emb(self, seq):
|
||||
return self.emb_state(seq)
|
||||
|
||||
def forward(self, msa, seq, idx, bond_feats, dist_matrix, same_chain=None):
|
||||
# Inputs:
|
||||
# - msa: Input MSA (B, N, L, d_init)
|
||||
# - seq: Input Sequence (B, L)
|
||||
# - idx: Residue index
|
||||
# - bond_feats: Bond features (B, L, L)
|
||||
# Outputs:
|
||||
# - msa: Initial MSA embedding (B, N, L, d_msa)
|
||||
# - pair: Initial Pair embedding (B, L, L, d_pair)
|
||||
|
||||
if self.enable_same_chain == False:
|
||||
same_chain = None
|
||||
|
||||
msa = self._msa_emb(msa, seq)
|
||||
|
||||
# pair embedding
|
||||
pair = self._pair_emb(seq, idx, bond_feats, dist_matrix, same_chain=same_chain)
|
||||
# state embedding
|
||||
state = self._state_emb(seq)
|
||||
return msa, pair, state
|
||||
|
||||
class MSA_emb_nostate(MSA_emb):
|
||||
def __init__(self, d_msa=256, d_pair=128, d_state=32, d_init=0, minpos=-32, maxpos=32, maxpos_atom=8, p_drop=0.1, use_same_chain=False):
|
||||
super().__init__(d_msa, d_pair, d_state, d_init, minpos, maxpos, maxpos_atom, p_drop, use_same_chain)
|
||||
if d_init==0:
|
||||
d_init = 2*ChemData().NAATOKENS + 2 + 2
|
||||
self.emb_state = None # emb state is just the identity
|
||||
|
||||
def forward(self, msa, seq, idx, bond_feats, dist_matrix):
|
||||
msa = self._msa_emb(msa, seq)
|
||||
pair = self._pair_emb(seq, idx, bond_feats, dist_matrix)
|
||||
return msa, pair, None
|
||||
|
||||
class Extra_emb(nn.Module):
|
||||
# Get initial seed MSA embedding
|
||||
def __init__(self, d_msa=256, d_init=0, p_drop=0.1):
|
||||
super(Extra_emb, self).__init__()
|
||||
if d_init==0:
|
||||
d_init=ChemData().NAATOKENS-1+4
|
||||
self.emb = nn.Linear(d_init, d_msa) # embedding for general MSA
|
||||
self.emb_q = nn.Embedding(ChemData().NAATOKENS, d_msa) # embedding for query sequence
|
||||
#self.drop = nn.Dropout(p_drop)
|
||||
|
||||
|
||||
self.reset_parameter()
|
||||
|
||||
def reset_parameter(self):
|
||||
self.emb = init_lecun_normal(self.emb)
|
||||
nn.init.zeros_(self.emb.bias)
|
||||
|
||||
def forward(self, msa, seq, idx):
|
||||
# Inputs:
|
||||
# - msa: Input MSA (B, N, L, d_init)
|
||||
# - seq: Input Sequence (B, L)
|
||||
# - idx: Residue index
|
||||
# Outputs:
|
||||
# - msa: Initial MSA embedding (B, N, L, d_msa)
|
||||
N = msa.shape[1] # number of sequenes in MSA
|
||||
msa = self.emb(msa) # (B, N, L, d_model) # MSA embedding
|
||||
seq = self.emb_q(seq).unsqueeze(1) # (B, 1, L, d_model) -- query embedding
|
||||
msa = msa + seq.expand(-1, N, -1, -1) # adding query embedding to MSA
|
||||
#return self.drop(msa)
|
||||
return (msa)
|
||||
|
||||
class Bond_emb(nn.Module):
|
||||
def __init__(self, d_pair=128, d_init=0):
|
||||
super(Bond_emb, self).__init__()
|
||||
|
||||
if d_init==0:
|
||||
d_init = ChemData().NBTYPES
|
||||
|
||||
self.emb = nn.Linear(d_init, d_pair)
|
||||
|
||||
self.reset_parameter()
|
||||
|
||||
def reset_parameter(self):
|
||||
self.emb = init_lecun_normal(self.emb)
|
||||
nn.init.zeros_(self.emb.bias)
|
||||
|
||||
def forward(self, bond_feats):
|
||||
bond_feats = torch.nn.functional.one_hot(bond_feats, num_classes=ChemData().NBTYPES)
|
||||
return self.emb(bond_feats.float())
|
||||
|
||||
class TemplatePairStack(nn.Module):
|
||||
def __init__(self, n_block=2, d_templ=64, n_head=4, d_hidden=32, d_t1d=22, d_state=32, p_drop=0.25,
|
||||
symmetrize_repeats=False, repeat_length=None, symmsub_k=1, sym_method=None):
|
||||
|
||||
super(TemplatePairStack, self).__init__()
|
||||
self.n_block = n_block
|
||||
self.proj_t1d = nn.Linear(d_t1d, d_state)
|
||||
|
||||
proc_s = [PairStr2Pair(d_pair=d_templ,
|
||||
n_head=n_head,
|
||||
d_hidden=d_hidden,
|
||||
d_state=d_state,
|
||||
p_drop=p_drop,
|
||||
symmetrize_repeats=symmetrize_repeats,
|
||||
repeat_length=repeat_length,
|
||||
symmsub_k=symmsub_k,
|
||||
sym_method=sym_method) for i in range(n_block)]
|
||||
|
||||
self.block = nn.ModuleList(proc_s)
|
||||
self.norm = nn.LayerNorm(d_templ)
|
||||
self.reset_parameter()
|
||||
|
||||
def reset_parameter(self):
|
||||
self.proj_t1d = init_lecun_normal(self.proj_t1d)
|
||||
nn.init.zeros_(self.proj_t1d.bias)
|
||||
|
||||
def forward(self, templ, rbf_feat, t1d, use_checkpoint=False, p2p_crop=-1):
|
||||
B, T, L = templ.shape[:3]
|
||||
templ = templ.reshape(B*T, L, L, -1)
|
||||
t1d = t1d.reshape(B*T, L, -1)
|
||||
state = self.proj_t1d(t1d)
|
||||
|
||||
for i_block in range(self.n_block):
|
||||
if use_checkpoint:
|
||||
templ = checkpoint.checkpoint(
|
||||
create_custom_forward(self.block[i_block]),
|
||||
templ, rbf_feat, state, p2p_crop,
|
||||
use_reentrant=True
|
||||
)
|
||||
else:
|
||||
templ = self.block[i_block](templ, rbf_feat, state)
|
||||
return self.norm(templ).reshape(B, T, L, L, -1)
|
||||
|
||||
|
||||
def copy_main_2d(pair, Leff, idx):
|
||||
"""
|
||||
Copies the "main unit" of a block in generic 2D representation of shape (...,L,L,h)
|
||||
along the main diagonal
|
||||
"""
|
||||
start = idx*Leff
|
||||
end = (idx+1)*Leff
|
||||
|
||||
# grab the main block
|
||||
main = torch.clone( pair[..., start:end, start:end, :] )
|
||||
|
||||
# copy it around the main diag
|
||||
L = pair.shape[-2]
|
||||
assert L%Leff == 0
|
||||
N = L//Leff
|
||||
|
||||
for i_block in range(N):
|
||||
start = i_block*Leff
|
||||
stop = (i_block+1)*Leff
|
||||
|
||||
pair[...,start:stop, start:stop, :] = main
|
||||
|
||||
return pair
|
||||
|
||||
|
||||
def copy_main_1d(single, Leff, idx):
|
||||
"""
|
||||
Copies the "main unit" of a block in generic 1D representation of shape (...,L,h)
|
||||
to all other (non-main) blocks
|
||||
|
||||
Parameters:
|
||||
single (torch.tensor, required): Shape [...,L,h] "1D" tensor
|
||||
"""
|
||||
main_start = idx*Leff
|
||||
main_end = (idx+1)*Leff
|
||||
|
||||
# grab main block
|
||||
main = torch.clone(single[..., main_start:main_end, :])
|
||||
|
||||
# copy it around
|
||||
L = single.shape[-2]
|
||||
assert L%Leff == 0
|
||||
N = L//Leff
|
||||
|
||||
for i_block in range(N):
|
||||
start = i_block*Leff
|
||||
end = (i_block+1)*Leff
|
||||
|
||||
single[..., start:end, :] = main
|
||||
|
||||
return single
|
||||
|
||||
|
||||
class Templ_emb(nn.Module):
|
||||
# Get template embedding
|
||||
# Features are
|
||||
# t2d:
|
||||
# - 61 distogram bins + 6 orientations (67)
|
||||
# - Mask (missing/unaligned) (1)
|
||||
# t1d:
|
||||
# - tiled AA sequence (20 standard aa + gap)
|
||||
# - confidence (1)
|
||||
#
|
||||
def __init__(self, d_t1d=0, d_t2d=67+1, d_tor=0, d_pair=128, d_state=32,
|
||||
n_block=2, d_templ=64,
|
||||
n_head=4, d_hidden=16, p_drop=0.25,
|
||||
symmetrize_repeats=False, repeat_length=None, symmsub_k=1, sym_method='mean',
|
||||
main_block=None, copy_main_block=None, additional_dt1d=0):
|
||||
if d_t1d==0:
|
||||
d_t1d=(ChemData().NAATOKENS-1)+1
|
||||
if d_tor==0:
|
||||
d_tor=3*ChemData().NTOTALDOFS
|
||||
|
||||
self.main_block = main_block
|
||||
self.symmetrize_repeats = symmetrize_repeats
|
||||
self.copy_main_block = copy_main_block
|
||||
self.repeat_length = repeat_length
|
||||
d_t1d += additional_dt1d
|
||||
|
||||
super(Templ_emb, self).__init__()
|
||||
# process 2D features
|
||||
self.emb = nn.Linear(d_t1d*2+d_t2d, d_templ)
|
||||
|
||||
self.templ_stack = TemplatePairStack(n_block=n_block, d_templ=d_templ, n_head=n_head,
|
||||
d_hidden=d_hidden, d_t1d=d_t1d, d_state=d_state, p_drop=p_drop,
|
||||
symmetrize_repeats=symmetrize_repeats, repeat_length=repeat_length,
|
||||
symmsub_k=symmsub_k, sym_method=sym_method)
|
||||
|
||||
self.attn = Attention(d_pair, d_templ, n_head, d_hidden, d_pair, p_drop=p_drop)
|
||||
|
||||
# process torsion angles
|
||||
self.emb_t1d = nn.Linear(d_t1d+d_tor, d_templ)
|
||||
self.proj_t1d = nn.Linear(d_templ, d_templ)
|
||||
#self.tor_stack = TemplateTorsionStack(n_block=n_block, d_templ=d_templ, n_head=n_head,
|
||||
# d_hidden=d_hidden, p_drop=p_drop)
|
||||
self.attn_tor = Attention(d_state, d_templ, n_head, d_hidden, d_state, p_drop=p_drop)
|
||||
|
||||
self.reset_parameter()
|
||||
|
||||
def reset_parameter(self):
|
||||
self.emb = init_lecun_normal(self.emb)
|
||||
nn.init.zeros_(self.emb.bias)
|
||||
|
||||
nn.init.kaiming_normal_(self.emb_t1d.weight, nonlinearity='relu')
|
||||
nn.init.zeros_(self.emb_t1d.bias)
|
||||
|
||||
self.proj_t1d = init_lecun_normal(self.proj_t1d)
|
||||
nn.init.zeros_(self.proj_t1d.bias)
|
||||
|
||||
def _get_templ_emb(self, t1d, t2d):
|
||||
B, T, L, _ = t1d.shape
|
||||
# Prepare 2D template features
|
||||
left = t1d.unsqueeze(3).expand(-1,-1,-1,L,-1)
|
||||
right = t1d.unsqueeze(2).expand(-1,-1,L,-1,-1)
|
||||
#
|
||||
templ = torch.cat((t2d, left, right), -1) # (B, T, L, L, 88)
|
||||
return self.emb(templ) # Template templures (B, T, L, L, d_templ)
|
||||
|
||||
def _get_templ_rbf(self, xyz_t, mask_t):
|
||||
B, T, L = xyz_t.shape[:3]
|
||||
|
||||
# process each template features
|
||||
xyz_t = xyz_t.reshape(B*T, L, 3).contiguous()
|
||||
mask_t = mask_t.reshape(B*T, L, L)
|
||||
assert(xyz_t.is_contiguous())
|
||||
rbf_feat = rbf(torch.cdist(xyz_t, xyz_t)) * mask_t[...,None] # (B*T, L, L, d_rbf)
|
||||
return rbf_feat
|
||||
|
||||
def forward(self, t1d, t2d, alpha_t, xyz_t, mask_t, pair, state, use_checkpoint=False, p2p_crop=-1):
|
||||
# Input
|
||||
# - t1d: 1D template info (B, T, L, 30)
|
||||
# - t2d: 2D template info (B, T, L, L, 44)
|
||||
# - alpha_t: torsion angle info (B, T, L, 30) - DOUBLE-CHECK
|
||||
# - xyz_t: template CA coordinates (B, T, L, 3)
|
||||
# - mask_t: is valid residue pair? (B, T, L, L)
|
||||
# - pair: query pair features (B, L, L, d_pair)
|
||||
# - state: query state features (B, L, d_state)
|
||||
B, T, L, _ = t1d.shape
|
||||
|
||||
templ = self._get_templ_emb(t1d, t2d)
|
||||
# this looks a lot like a bug but it is not
|
||||
# mask_t has already been updated by same_chain in the train_EMA script so pairwise distances between
|
||||
# protein chains are ignored
|
||||
rbf_feat = self._get_templ_rbf(xyz_t, mask_t)
|
||||
|
||||
# process each template pair feature
|
||||
templ = self.templ_stack(templ, rbf_feat, t1d, use_checkpoint=use_checkpoint, p2p_crop=p2p_crop) # (B, T, L,L, d_templ)
|
||||
|
||||
# DJ - repeat protein symmetrization (2D)
|
||||
if self.copy_main_block:
|
||||
assert not (self.main_block is None)
|
||||
assert self.symmetrize_repeats
|
||||
# copy the main repeat unit internally down the pair representation diagonal
|
||||
templ = copy_main_2d(templ, self.repeat_length, self.main_block)
|
||||
|
||||
# Prepare 1D template torsion angle features
|
||||
t1d = torch.cat((t1d, alpha_t), dim=-1) # (B, T, L, 30+3*17)
|
||||
# process each template features
|
||||
t1d = self.proj_t1d(F.relu_(self.emb_t1d(t1d)))
|
||||
|
||||
# DJ - repeat protein symmetrization (1D)
|
||||
if self.copy_main_block:
|
||||
# already made assertions above
|
||||
# copy main unit down single rep
|
||||
t1d = copy_main_1d(t1d, self.repeat_length, self.main_block)
|
||||
|
||||
# mixing query state features to template state features
|
||||
state = state.reshape(B*L, 1, -1)
|
||||
t1d = t1d.permute(0,2,1,3).reshape(B*L, T, -1)
|
||||
if use_checkpoint:
|
||||
out = checkpoint.checkpoint(
|
||||
create_custom_forward(self.attn_tor), state, t1d, t1d, use_reentrant=True
|
||||
)
|
||||
out = out.reshape(B, L, -1)
|
||||
else:
|
||||
out = self.attn_tor(state, t1d, t1d).reshape(B, L, -1)
|
||||
state = state.reshape(B, L, -1)
|
||||
state = state + out
|
||||
|
||||
# mixing query pair features to template information (Template pointwise attention)
|
||||
pair = pair.reshape(B*L*L, 1, -1)
|
||||
templ = templ.permute(0, 2, 3, 1, 4).reshape(B*L*L, T, -1)
|
||||
if use_checkpoint:
|
||||
out = checkpoint.checkpoint(
|
||||
create_custom_forward(self.attn), pair, templ, templ, use_reentrant=True
|
||||
)
|
||||
out = out.reshape(B, L, L, -1)
|
||||
else:
|
||||
out = self.attn(pair, templ, templ).reshape(B, L, L, -1)
|
||||
#
|
||||
pair = pair.reshape(B, L, L, -1)
|
||||
pair = pair + out
|
||||
|
||||
return pair, state
|
||||
|
||||
|
||||
class Recycling(nn.Module):
|
||||
def __init__(self, d_msa=256, d_pair=128, d_state=32, d_rbf=64):
|
||||
super(Recycling, self).__init__()
|
||||
self.proj_dist = nn.Linear(d_rbf, d_pair)
|
||||
self.norm_pair = nn.LayerNorm(d_pair)
|
||||
self.norm_msa = nn.LayerNorm(d_msa)
|
||||
|
||||
self.reset_parameter()
|
||||
|
||||
def reset_parameter(self):
|
||||
#self.emb_rbf = init_lecun_normal(self.emb_rbf)
|
||||
#nn.init.zeros_(self.emb_rbf.bias)
|
||||
self.proj_dist = init_lecun_normal(self.proj_dist)
|
||||
nn.init.zeros_(self.proj_dist.bias)
|
||||
|
||||
def forward(self, msa, pair, xyz, state, sctors, mask_recycle=None):
|
||||
B, L = msa.shape[:2]
|
||||
msa = self.norm_msa(msa)
|
||||
pair = self.norm_pair(pair)
|
||||
|
||||
Ca = xyz[:,:,1]
|
||||
dist_CA = rbf(
|
||||
torch.cdist(Ca, Ca)
|
||||
).reshape(B,L,L,-1)
|
||||
|
||||
if mask_recycle != None:
|
||||
dist_CA = mask_recycle[...,None].float()*dist_CA
|
||||
|
||||
pair = pair + self.proj_dist(dist_CA)
|
||||
|
||||
return msa, pair, state # state is just zeros
|
||||
|
||||
class RecyclingAllFeatures(nn.Module):
|
||||
def __init__(self, d_msa=256, d_pair=128, d_state=32, d_rbf=64):
|
||||
super(RecyclingAllFeatures, self).__init__()
|
||||
self.proj_dist = nn.Linear(d_rbf+d_state*2, d_pair)
|
||||
self.norm_pair = nn.LayerNorm(d_pair)
|
||||
self.proj_sctors = nn.Linear(2*ChemData().NTOTALDOFS, d_msa)
|
||||
self.norm_msa = nn.LayerNorm(d_msa)
|
||||
self.norm_state = nn.LayerNorm(d_state)
|
||||
|
||||
self.reset_parameter()
|
||||
|
||||
def reset_parameter(self):
|
||||
self.proj_dist = init_lecun_normal(self.proj_dist)
|
||||
nn.init.zeros_(self.proj_dist.bias)
|
||||
self.proj_sctors = init_lecun_normal(self.proj_sctors)
|
||||
nn.init.zeros_(self.proj_sctors.bias)
|
||||
|
||||
def forward(self, msa, pair, xyz, state, sctors, mask_recycle=None):
|
||||
B, L = pair.shape[:2]
|
||||
state = self.norm_state(state)
|
||||
|
||||
left = state.unsqueeze(2).expand(-1,-1,L,-1)
|
||||
right = state.unsqueeze(1).expand(-1,L,-1,-1)
|
||||
|
||||
Ca_or_P = xyz[:,:,1].contiguous()
|
||||
|
||||
dist = rbf(torch.cdist(Ca_or_P, Ca_or_P))
|
||||
if mask_recycle != None:
|
||||
dist = mask_recycle[...,None].float()*dist
|
||||
dist = torch.cat((dist, left, right), dim=-1)
|
||||
dist = self.proj_dist(dist)
|
||||
pair = dist + self.norm_pair(pair)
|
||||
|
||||
sctors = self.proj_sctors(sctors.reshape(B,-1,2*ChemData().NTOTALDOFS))
|
||||
msa = sctors + self.norm_msa(msa)
|
||||
|
||||
return msa, pair, state
|
||||
|
||||
recycling_factory = {
|
||||
"msa_pair": Recycling,
|
||||
"all": RecyclingAllFeatures
|
||||
}
|
||||
100
rf2aa/model/layers/SE3_network.py
Normal file
100
rf2aa/model/layers/SE3_network.py
Normal file
@@ -0,0 +1,100 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from icecream import ic
|
||||
import inspect
|
||||
|
||||
import sys, os
|
||||
#script_dir = os.path.dirname(os.path.realpath(__file__))+'/'
|
||||
#sys.path.insert(0,script_dir+'SE3Transformer')
|
||||
|
||||
from rf2aa.util import xyz_frame_from_rotation_mask
|
||||
from rf2aa.util_module import init_lecun_normal_param, \
|
||||
make_full_graph, rbf, init_lecun_normal
|
||||
from rf2aa.loss.loss import calc_chiral_grads
|
||||
from rf2aa.model.layers.Attention_module import FeedForwardLayer
|
||||
from rf2aa.SE3Transformer.se3_transformer.model import SE3Transformer
|
||||
from rf2aa.SE3Transformer.se3_transformer.model.fiber import Fiber
|
||||
from rf2aa.util_module import get_seqsep_protein_sm
|
||||
|
||||
se3_transformer_path = inspect.getfile(SE3Transformer)
|
||||
se3_fiber_path = inspect.getfile(Fiber)
|
||||
assert 'rf2aa' in se3_transformer_path
|
||||
|
||||
class SE3TransformerWrapper(nn.Module):
|
||||
"""SE(3) equivariant GCN with attention"""
|
||||
def __init__(self, num_layers=2, num_channels=32, num_degrees=3, n_heads=4, div=4,
|
||||
l0_in_features=32, l0_out_features=32,
|
||||
l1_in_features=3, l1_out_features=2,
|
||||
num_edge_features=32):
|
||||
super().__init__()
|
||||
# Build the network
|
||||
self.l1_in = l1_in_features
|
||||
self.l1_out = l1_out_features
|
||||
#
|
||||
fiber_edge = Fiber({0: num_edge_features})
|
||||
if l1_out_features > 0:
|
||||
if l1_in_features > 0:
|
||||
fiber_in = Fiber({0: l0_in_features, 1: l1_in_features})
|
||||
fiber_hidden = Fiber.create(num_degrees, num_channels)
|
||||
fiber_out = Fiber({0: l0_out_features, 1: l1_out_features})
|
||||
else:
|
||||
fiber_in = Fiber({0: l0_in_features})
|
||||
fiber_hidden = Fiber.create(num_degrees, num_channels)
|
||||
fiber_out = Fiber({0: l0_out_features, 1: l1_out_features})
|
||||
else:
|
||||
if l1_in_features > 0:
|
||||
fiber_in = Fiber({0: l0_in_features, 1: l1_in_features})
|
||||
fiber_hidden = Fiber.create(num_degrees, num_channels)
|
||||
fiber_out = Fiber({0: l0_out_features})
|
||||
else:
|
||||
fiber_in = Fiber({0: l0_in_features})
|
||||
fiber_hidden = Fiber.create(num_degrees, num_channels)
|
||||
fiber_out = Fiber({0: l0_out_features})
|
||||
|
||||
self.se3 = SE3Transformer(num_layers=num_layers,
|
||||
fiber_in=fiber_in,
|
||||
fiber_hidden=fiber_hidden,
|
||||
fiber_out = fiber_out,
|
||||
num_heads=n_heads,
|
||||
channels_div=div,
|
||||
fiber_edge=fiber_edge,
|
||||
populate_edge="arcsin",
|
||||
final_layer="lin",
|
||||
use_layer_norm=True)
|
||||
|
||||
self.reset_parameter()
|
||||
|
||||
def reset_parameter(self):
|
||||
|
||||
# make sure linear layer before ReLu are initialized with kaiming_normal_
|
||||
for n, p in self.se3.named_parameters():
|
||||
if "bias" in n:
|
||||
nn.init.zeros_(p)
|
||||
elif len(p.shape) == 1:
|
||||
continue
|
||||
else:
|
||||
if "radial_func" not in n:
|
||||
p = init_lecun_normal_param(p)
|
||||
else:
|
||||
if "net.6" in n:
|
||||
nn.init.zeros_(p)
|
||||
else:
|
||||
nn.init.kaiming_normal_(p, nonlinearity='relu')
|
||||
|
||||
# make last layers to be zero-initialized
|
||||
#self.se3.graph_modules[-1].to_kernel_self['0'] = init_lecun_normal_param(self.se3.graph_modules[-1].to_kernel_self['0'])
|
||||
#self.se3.graph_modules[-1].to_kernel_self['1'] = init_lecun_normal_param(self.se3.graph_modules[-1].to_kernel_self['1'])
|
||||
#nn.init.zeros_(self.se3.graph_modules[-1].to_kernel_self['0'])
|
||||
#nn.init.zeros_(self.se3.graph_modules[-1].to_kernel_self['1'])
|
||||
nn.init.zeros_(self.se3.graph_modules[-1].weights['0'])
|
||||
if self.l1_out > 0:
|
||||
nn.init.zeros_(self.se3.graph_modules[-1].weights['1'])
|
||||
|
||||
def forward(self, G, type_0_features, type_1_features=None, edge_features=None):
|
||||
if self.l1_in > 0:
|
||||
node_features = {'0': type_0_features, '1': type_1_features}
|
||||
else:
|
||||
node_features = {'0': type_0_features}
|
||||
edge_features = {'0': edge_features}
|
||||
return self.se3(G, node_features, edge_features)
|
||||
|
||||
Reference in New Issue
Block a user