Scale Embeddings to the Unit Hypersphere
normalize.Rd
Normalize embeddings such that their magnitude is 1, while their angle from the origin is unchanged.
Usage
normalize(x, ...)
# S3 method for class 'numeric'
normalize(x, ...)
normalize_rows(x, ...)
# Default S3 method
normalize_rows(x, ...)
# S3 method for class 'data.frame'
normalize_rows(x, cols, ...)
Examples
vec <- c(1, 4, 2)
normalize(vec)
#> [1] 0.2182179 0.8728716 0.4364358
valence_embeddings <- predict(glove_twitter_25d, c("good", "bad"))
normalize(valence_embeddings)
#> # 25-dimensional embeddings with 2 rows
#> dim_1 dim_2 dim_3 dim_4 dim_5 dim_6 dim_7 dim_8 dim_9 dim..
#> good -0.09 0.10 -0.02 -0.00 -0.02 0.10 0.36 0.03 -0.09 -0.04 ...
#> bad 0.08 0.00 0.01 -0.00 0.05 0.13 0.31 -0.02 -0.05 0.02 ...
embeddings_list <- find_nearest(glove_twitter_25d, c("good", "bad"), each = TRUE)
normalize(embeddings_list)
#> $good
#> # 25-dimensional embeddings with 10 rows
#> dim_1 dim_2 dim_3 dim_4 dim_5 dim_6 dim_7 dim_8 dim_9 dim..
#> good -0.09 0.10 -0.02 -0.00 -0.02 0.10 0.36 0.03 -0.09 -0.04 ...
#> too -0.08 0.07 0.02 -0.00 -0.04 0.08 0.29 0.01 -0.13 0.01 ...
#> day -0.16 0.09 0.06 -0.07 -0.01 -0.03 0.39 0.04 -0.08 -0.03 ...
#> well -0.01 0.04 -0.02 -0.11 -0.12 0.03 0.29 -0.05 -0.14 0.02 ...
#> nice -0.15 -0.02 -0.01 -0.03 0.03 0.19 0.26 0.00 -0.17 0.02 ...
#> better -0.02 0.12 -0.02 -0.05 -0.06 0.11 0.27 -0.04 -0.06 -0.07 ...
#> fun -0.13 0.10 0.03 -0.10 0.06 0.15 0.41 -0.03 -0.10 -0.00 ...
#> much -0.04 0.03 -0.04 0.02 -0.06 0.03 0.31 -0.03 -0.25 -0.02 ...
#> this -0.03 0.06 0.01 -0.05 -0.01 -0.06 0.33 -0.02 -0.09 -0.03 ...
#> hope -0.14 0.14 0.00 -0.04 -0.23 -0.06 0.32 0.03 -0.15 -0.04 ...
#>
#> $bad
#> # 25-dimensional embeddings with 10 rows
#> dim_1 dim_2 dim_3 dim_4 dim_5 dim_6 dim_7 dim_8 dim_9 dim..
#> bad 0.08 0.00 0.01 -0.00 0.05 0.13 0.31 -0.02 -0.05 0.02 ...
#> shit 0.11 0.12 0.07 0.02 0.05 0.11 0.20 -0.04 -0.11 0.07 ...
#> crazy -0.02 0.06 0.10 -0.05 0.07 0.17 0.33 0.00 -0.15 -0.04 ...
#> but 0.03 -0.00 -0.05 -0.03 -0.09 0.03 0.28 -0.10 -0.08 0.02 ...
#> hell 0.04 0.09 0.09 0.01 -0.05 0.06 0.29 -0.05 -0.11 0.09 ...
#> right -0.07 0.02 0.00 -0.02 -0.01 -0.01 0.30 -0.08 -0.08 0.08 ...
#> like 0.01 0.02 0.10 0.06 -0.05 0.07 0.29 -0.00 -0.11 0.03 ...
#> same 0.09 0.05 0.09 -0.00 -0.00 0.06 0.24 -0.01 -0.03 0.12 ...
#> damn 0.04 0.13 0.11 -0.01 0.00 0.13 0.28 0.00 -0.12 0.11 ...
#> thing 0.03 -0.00 0.04 0.01 0.07 -0.01 0.39 -0.11 -0.10 -0.03 ...
#>
valence_df <- tibble::as_tibble(valence_embeddings, rownames = "token")
valence_df |> normalize_rows(dim_1:dim_25)
#> # A tibble: 2 × 26
#> token dim_1 dim_2 dim_3 dim_4 dim_5 dim_6 dim_7 dim_8 dim_9
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 good -0.0906 0.100 -0.0242 -0.00390 -0.0229 0.100 0.365 0.0346 -0.0858
#> 2 bad 0.0773 0.00417 0.0106 -0.00196 0.0511 0.133 0.306 -0.0209 -0.0490
#> # ℹ 16 more variables: dim_10 <dbl>, dim_11 <dbl>, dim_12 <dbl>, dim_13 <dbl>,
#> # dim_14 <dbl>, dim_15 <dbl>, dim_16 <dbl>, dim_17 <dbl>, dim_18 <dbl>,
#> # dim_19 <dbl>, dim_20 <dbl>, dim_21 <dbl>, dim_22 <dbl>, dim_23 <dbl>,
#> # dim_24 <dbl>, dim_25 <dbl>