add contiguous in BertIntermediate (#13713)

faster step with a lot less recomputation
This commit is contained in:
chenyu
2025-12-15 22:37:36 -05:00
committed by GitHub
parent 7589c897b2
commit 041e9a41c9

View File

@@ -242,7 +242,8 @@ class BertIntermediate:
def __call__(self, hidden_states):
x = self.dense(hidden_states)
# tinygrad gelu is openai gelu but we need the original bert gelu
return gelu(x)
# NOTE: contiguous for speed
return gelu(x).contiguous()
class BertAttention:
def __init__(self, hidden_size, num_attention_heads, attention_probs_dropout_prob, hidden_dropout_prob):