update normalize method
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
from transformers import AutoModel, AutoTokenizer
|
from transformers import AutoModel, AutoTokenizer
|
||||||
from sklearn.preprocessing import normalize
|
from sklearn.preprocessing import normalize
|
||||||
import torch
|
import torch
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
|
||||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
@@ -43,9 +44,7 @@ def acge_embedding(text: list[str]) -> list[list[float]]:
|
|||||||
~attention_mask[..., None].bool(), 0.0
|
~attention_mask[..., None].bool(), 0.0
|
||||||
)
|
)
|
||||||
vector = last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
|
vector = last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
|
||||||
vector = normalize(
|
# Normalize the output vectors
|
||||||
vector.cpu().detach().numpy(),
|
normalized_vector = F.normalize(vector, p=2, dim=1)
|
||||||
norm="l2",
|
return normalized_vector.tolist()
|
||||||
axis=1,
|
|
||||||
)
|
|
||||||
return vector.tolist()
|
|
||||||
|
|||||||
Reference in New Issue
Block a user