From a2195ef9901ee6d0c60f6435362ed9099b60cbf4 Mon Sep 17 00:00:00 2001 From: tomazweiss <38002502+tomazweiss@users.noreply.github.com> Date: Fri, 8 Jul 2022 09:35:28 +0200 Subject: [PATCH] Update sentence-transformers.R Previous example didn't work. --- R/sentence-transformers.R | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/R/sentence-transformers.R b/R/sentence-transformers.R index 94f762e..622326d 100644 --- a/R/sentence-transformers.R +++ b/R/sentence-transformers.R @@ -21,15 +21,22 @@ hf_load_sentence_model <- function(model_id) { ##' examples ##' dontrun{ ##' # Compute sentence embeddings -##' sentences <- c("Baby turtles are so cute!", "He walks as slowly as a turtle.") +##' sentences_one <- c("Baby turtles are so cute!", "He walks as slowly as a turtle.") ##' sentences_two <- c("The lake is cold today.", "I enjoy swimming in the lake.") -##' sentences <- c(sentences, sentences_two) +##' sentences <- c(sentences_one, sentences_two) ##' model <- hf_load_sentence_model('paraphrase-MiniLM-L6-v2') ##' embeddings <- model$encode(sentences) -##' embeddings %>% dist() %>% as.matrix() %>% as.data.frame() %>% setNames(sentences) -##' embddings <- embeddings %>% dplyr::mutate(`sentence 1` = sentences) %>% tidyr::pivot_longer(cols = -`sentence 1`, names_to = 'sentence 2', values_to = 'distance') -##' embeddings <- embeddings %>% filter(distance > 0) +##' distances <- embeddings %>% dist() %>% as.matrix() %>% as.data.frame() %>% setNames(sentences) +##' distances <- distances %>% +##' dplyr::mutate(`sentence 1` = sentences) %>% +##' tidyr::pivot_longer(cols = -`sentence 1`, names_to = 'sentence 2', values_to = 'distance') +##' distances <- distances %>% dplyr::filter(distance > 0) +##' distances ##' # Cluster sentences -##' embeddings <- embeddings %>% t() %>% prcomp() %>% purrr::pluck('rotation') %>% as.data.frame() %>% dplyr::mutate(sentence = sentences) -##' plot <- embedidings %>% ggplot2::ggplot(aes(PC1, PC2)) + ggplot2::geom_label(ggplot2::aes(PC1, PC2, label = sentence, vjust="inward", hjust="inward")) + ggplot2::theme_minimal() +##' embeddings_pca <- embeddings %>% t() %>% prcomp() %>% purrr::pluck('rotation') %>% as.data.frame() %>% dplyr::mutate(sentence = sentences) +##' embeddings_pca %>% +##' ggplot2::ggplot(ggplot2::aes(PC1, PC2)) + +##' ggplot2::geom_point() + +##' ggplot2::geom_text(ggplot2::aes(label = sentence), vjust="inward", hjust="inward") + +##' ggplot2::theme_minimal() ##' }