add print.top2vec

jwijffels · jwijffels · commit 0cc62d0bf154 · 2021-03-25T17:59:57.000+01:00
diff --git a/NAMESPACE b/NAMESPACE
@@ -4,6 +4,7 @@ S3method(as.matrix,paragraph2vec)
 S3method(as.matrix,paragraph2vec_trained)
 S3method(predict,paragraph2vec)
 S3method(predict,paragraph2vec_trained)
+S3method(print,top2vec)
 S3method(print,top2vec_summary)
 S3method(summary,paragraph2vec)
 S3method(summary,paragraph2vec_trained)
diff --git a/R/top2vec.R b/R/top2vec.R
@@ -88,13 +88,17 @@
 #' ##  with unrealistic hyperparameter settings especially regarding dim / iter / n_epochs
 #' ##  in order to have a basic example finishing < 5 secs
 #' ##
+#' \dontshow{if(require(word2vec) && require(uwot) && require(dbscan))\{}
 #' library(uwot)
 #' library(dbscan)
+#' library(word2vec)
 #' data(be_parliament_2020, package = "doc2vec")
 #' x        <- data.frame(doc_id = be_parliament_2020$doc_id,
 #'                        text   = be_parliament_2020$text_nl,
 #'                        stringsAsFactors = FALSE)
-#' x        <- head(x, 500)
+#' x        <- head(x, 1000)
+#' x$text   <- txt_clean_word2vec(x$text)
+#' x        <- subset(x, txt_count_words(text) < 1000)
 #' d2v      <- paragraph2vec(x, type = "PV-DBOW", dim = 10, 
 #'                           lr = 0.05, iter = 0,
 #'                           window = 5, hs = TRUE, negative = 0,
@@ -104,11 +108,12 @@
 #' model    <- top2vec(emb, 
 #'                     data = x,
 #'                     control.dbscan = list(minPts = 50), 
-#'                     control.umap = list(n_neighbors = 5, n_components = 2, 
-#'                                         n_epochs = 0, init = "spectral"), 
+#'                     control.umap = list(n_neighbors = 15, n_components = 2, 
+#'                                         init = "spectral"), 
 #'                     umap = tumap, trace = TRUE)
 #' info     <- summary(model, top_n = 7)
 #' print(info, top_n = c(5, 2))
+#' \dontshow{\} # End of main if statement running only if the required packages are installed}
 top2vec <- function(x, 
                     data = data.frame(doc_id = character(), text = character(), stringsAsFactors = FALSE), 
                     control.umap = list(n_neighbors = 15L, n_components = 5L, metric = "cosine"), 
@@ -173,6 +178,13 @@ top2vec <- function(x,
   out
 }
 
+#' @export
+print.top2vec <- function(x, ...){
+  cat(sprintf("Top2vec model trained on %s documents", nrow(x$embedding$docs)), sep = "\n")
+  cat(sprintf("  number of topics: %s", x$k), sep = "\n")
+  cat(sprintf("  topic distribution: %s", paste(round(prop.table(x$size), 2), collapse = " ")), sep = "\n")
+}
+
 
 #' @title Update a Top2vec model
 #' @description Update a Top2vec model by updating the UMAP dimension reduction together with the HDBSCAN clustering
diff --git a/man/top2vec.Rd b/man/top2vec.Rd