Skip to content

Commit 0cc62d0

Browse files
committed
add print.top2vec
1 parent dfb7d9b commit 0cc62d0

3 files changed

Lines changed: 24 additions & 6 deletions

File tree

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ S3method(as.matrix,paragraph2vec)
44
S3method(as.matrix,paragraph2vec_trained)
55
S3method(predict,paragraph2vec)
66
S3method(predict,paragraph2vec_trained)
7+
S3method(print,top2vec)
78
S3method(print,top2vec_summary)
89
S3method(summary,paragraph2vec)
910
S3method(summary,paragraph2vec_trained)

R/top2vec.R

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,17 @@
8888
#' ## with unrealistic hyperparameter settings especially regarding dim / iter / n_epochs
8989
#' ## in order to have a basic example finishing < 5 secs
9090
#' ##
91+
#' \dontshow{if(require(word2vec) && require(uwot) && require(dbscan))\{}
9192
#' library(uwot)
9293
#' library(dbscan)
94+
#' library(word2vec)
9395
#' data(be_parliament_2020, package = "doc2vec")
9496
#' x <- data.frame(doc_id = be_parliament_2020$doc_id,
9597
#' text = be_parliament_2020$text_nl,
9698
#' stringsAsFactors = FALSE)
97-
#' x <- head(x, 500)
99+
#' x <- head(x, 1000)
100+
#' x$text <- txt_clean_word2vec(x$text)
101+
#' x <- subset(x, txt_count_words(text) < 1000)
98102
#' d2v <- paragraph2vec(x, type = "PV-DBOW", dim = 10,
99103
#' lr = 0.05, iter = 0,
100104
#' window = 5, hs = TRUE, negative = 0,
@@ -104,11 +108,12 @@
104108
#' model <- top2vec(emb,
105109
#' data = x,
106110
#' control.dbscan = list(minPts = 50),
107-
#' control.umap = list(n_neighbors = 5, n_components = 2,
108-
#' n_epochs = 0, init = "spectral"),
111+
#' control.umap = list(n_neighbors = 15, n_components = 2,
112+
#' init = "spectral"),
109113
#' umap = tumap, trace = TRUE)
110114
#' info <- summary(model, top_n = 7)
111115
#' print(info, top_n = c(5, 2))
116+
#' \dontshow{\} # End of main if statement running only if the required packages are installed}
112117
top2vec <- function(x,
113118
data = data.frame(doc_id = character(), text = character(), stringsAsFactors = FALSE),
114119
control.umap = list(n_neighbors = 15L, n_components = 5L, metric = "cosine"),
@@ -173,6 +178,13 @@ top2vec <- function(x,
173178
out
174179
}
175180

181+
#' @export
182+
print.top2vec <- function(x, ...){
183+
cat(sprintf("Top2vec model trained on %s documents", nrow(x$embedding$docs)), sep = "\n")
184+
cat(sprintf(" number of topics: %s", x$k), sep = "\n")
185+
cat(sprintf(" topic distribution: %s", paste(round(prop.table(x$size), 2), collapse = " ")), sep = "\n")
186+
}
187+
176188

177189
#' @title Update a Top2vec model
178190
#' @description Update a Top2vec model by updating the UMAP dimension reduction together with the HDBSCAN clustering

man/top2vec.Rd

Lines changed: 8 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)