英文:
Error using missing values in Principal Components Analysis with missMDA
问题
我有一个数据集中某个变量存在缺失值,我正在尝试对其进行PCA,但一直收到相同的错误消息。
# 包
library(tidyverse)
library(missMDA)
# 数据集
df <- structure(list(
A = c(39.64, 39.23, 38.89, 38.63, 38.44, 38.32, 38.27, 38.3, 38.4, 38.56, 38.79, 39.06, 39.36,
39.68, 40.01, 40.34, 40.68, 41.05, 41.46, 41.93, 42.48, 43.14, 43.92, 44.84, 45.9, 47.1, 48.4,
49.78, 51.2, 52.62, 54.01, 55.31, 56.52, 57.6, 58.54, 59.33, 59.98, 60.46, 60.78, 60.94, 60.92,
60.71, 60.3, 59.69, 58.87, 57.86, 56.67, 55.33, 53.87, 52.33, 50.75),
B = c(3.29, 3.29, 3.3, 3.32, 3.33, 3.35, 3.36, 3.38, 3.4, 3.42, 3.44, 3.46, 3.48, 3.5, 3.52, 3.53,
3.54, 3.55, 3.55, 3.54, 3.53, 3.51, 3.48, 3.44, 3.4, 3.35, 3.3, 3.24, 3.18, 3.12, 3.07, 3.02, 2.98,
2.96, 2.94, 2.94, 2.95, 2.98, 3.02, 3.08, 3.14, 3.22, 3.3, 3.4, 3.49, 3.59, 3.7, 3.8, 3.91, 4.02,
4.12),
C = c(NA, NA, NA, NA, NA, NA, NA, NA, 12.23, 4.3, 3.44, 3.49, 5.59, 0.76, 4.2, 4.61, 4.6, 8.03,
4.26, 3.31, 4.16, 0.37, -2.17, -1.93, -3.7, -0.48, -4.63, -2.89, -4.29, -2.35, -2.27, -1.66, -2.82,
-3.2, -2.55, -0.9, -2.42, -0.84, -1.46, -5.39, -2.57, -2.12, -0.59, 1.75, 3.7, 0.99, -3.28, -3.2,
-0.6, -0.61, -1.94)), class = "data.frame", row.names = c(NA, -51L))
# 估算维度
pcaDim <- estim_ncpPCA(X = df, method.cv = "Kfold")
# PCA
pca <- imputePCA(X = df, ncp = pcaDim)
Error in imputePCA(X = df, ncp = pcaDim) :
'list' object cannot be coerced to type 'double'
# 我尝试使用unlist函数修复此错误,但未能解决。
df <- mutate(df, across(everything(), ~as.numeric(unlist(.))))
希望这有帮助。
英文:
I have a dataset with missing values in one variable that I'm trying to perform a PCA on, however, I keep getting the same error message.
# packages
library(tidyverse)
library(missMDA)
# dataset
df <- structure(list(
A = c(39.64, 39.23, 38.89, 38.63, 38.44, 38.32, 38.27, 38.3, 38.4, 38.56, 38.79, 39.06, 39.36,
39.68, 40.01, 40.34, 40.68, 41.05, 41.46, 41.93, 42.48, 43.14, 43.92, 44.84, 45.9, 47.1, 48.4,
49.78, 51.2, 52.62, 54.01, 55.31, 56.52, 57.6, 58.54, 59.33, 59.98, 60.46, 60.78, 60.94, 60.92,
60.71, 60.3, 59.69, 58.87, 57.86, 56.67, 55.33, 53.87, 52.33, 50.75),
B = c(3.29, 3.29, 3.3, 3.32, 3.33, 3.35, 3.36, 3.38, 3.4, 3.42, 3.44, 3.46, 3.48, 3.5, 3.52, 3.53,
3.54, 3.55, 3.55, 3.54, 3.53, 3.51, 3.48, 3.44, 3.4, 3.35, 3.3, 3.24, 3.18, 3.12, 3.07, 3.02, 2.98,
2.96, 2.94, 2.94, 2.95, 2.98, 3.02, 3.08, 3.14, 3.22, 3.3, 3.4, 3.49, 3.59, 3.7, 3.8, 3.91, 4.02,
4.12),
C = c(NA, NA, NA, NA, NA, NA, NA, NA, 12.23, 4.3, 3.44, 3.49, 5.59, 0.76, 4.2, 4.61, 4.6, 8.03,
4.26, 3.31, 4.16, 0.37, -2.17, -1.93, -3.7, -0.48, -4.63, -2.89, -4.29, -2.35, -2.27, -1.66, -2.82,
-3.2, -2.55, -0.9, -2.42, -0.84, -1.46, -5.39, -2.57, -2.12, -0.59, 1.75, 3.7, 0.99, -3.28, -3.2,
-0.6, -0.61, -1.94)), class = "data.frame", row.names = c(NA, -51L))
# estimate number of dimensions
pcaDim <- estim_ncpPCA(X = df, method.cv = "Kfold")
# PCA
pca <- imputePCA(X = df, ncp = pcaDim)
Error in imputePCA(X = df, ncp = pcaDim) :
'list' object cannot be coerced to type 'double'
# I've tried to fix this error with the unlist function, but that has not helped.
df <- mutate(df, across(everything(), ~as.numeric(unlist(.))))
答案1
得分: 1
The "ncp"选项在imputePCA()
中需要一个“用于预测缺失条目的组件数量的整数”;我的猜测是你应该将pcaDim$ncp
传递给此参数,即:
#install.packages("missMDA")
library(missMDA)
# dataset
df <- structure(list(
A = c(39.64, 39.23, 38.89, 38.63, 38.44, 38.32, 38.27, 38.3, 38.4, 38.56, 38.79, 39.06, 39.36,
39.68, 40.01, 40.34, 40.68, 41.05, 41.46, 41.93, 42.48, 43.14, 43.92, 44.84, 45.9, 47.1, 48.4,
49.78, 51.2, 52.62, 54.01, 55.31, 56.52, 57.6, 58.54, 59.33, 59.98, 60.46, 60.78, 60.94, 60.92,
60.71, 60.3, 59.69, 58.87, 57.86, 56.67, 55.33, 53.87, 52.33, 50.75),
B = c(3.29, 3.29, 3.3, 3.32, 3.33, 3.35, 3.36, 3.38, 3.4, 3.42, 3.44, 3.46, 3.48, 3.5, 3.52, 3.53,
3.54, 3.55, 3.55, 3.54, 3.53, 3.51, 3.48, 3.44, 3.4, 3.35, 3.3, 3.24, 3.18, 3.12, 3.07, 3.02, 2.98,
2.96, 2.94, 2.94, 2.95, 2.98, 3.02, 3.08, 3.14, 3.22, 3.3, 3.4, 3.49, 3.59, 3.7, 3.8, 3.91, 4.02,
4.12),
C = c(NA, NA, NA, NA, NA, NA, NA, NA, 12.23, 4.3, 3.44, 3.49, 5.59, 0.76, 4.2, 4.61, 4.6, 8.03,
4.26, 3.31, 4.16, 0.37, -2.17, -1.93, -3.7, -0.48, -4.63, -2.89, -4.29, -2.35, -2.27, -1.66, -2.82,
-3.2, -2.55, -0.9, -2.42, -0.84, -1.46, -5.39, -2.57, -2.12, -0.59, 1.75, 3.7, 0.99, -3.28, -3.2,
-0.6, -0.61, -1.94)), class = "data.frame", row.names = c(NA, -51L))
# estimate number of dimensions
pcaDim <- estim_ncpPCA(X = df, method.cv = "Kfold")
#> |=| 1%
#> ...
#> |===============================================| 100%
# PCA
pca <- imputePCA(X = df, ncp = pcaDim$ncp)
pca
#> $completeObs
#> A B C
#> [1,] 39.64 3.29 3.165764
#> [2,] 39.23 3.29 3.282610
#> [3,] 38.89 3.30 3.386259
#> [4,] 38.63 3.32 3.473863
#> [5,] 38.44 3.33 3.534764
#> [6,] 38.32 3.35 3.582469
#> [7,] 38.27 3.36 3.603472
#> [8,] 38.30 3.38 3.608428
#> [9,] 38.40 3.40 12.230000
#> [10,] 38.56 3.42 4.300000
#> [11,] 38.79 3.44 3.440000
#> [12,] 39.06 3.46 3.490000
#> [13,] 39.36 3.48 5.590000
#> [14,] 39.68 3.50 0.760000
#> [15,] 40.01 3.52 4.200000
#> [16,] 40.34 3.53 4.610000
#> [17,] 40.68 3.54 4.600000
#> [18,] 41.05 3.55 8.030000
#> [19,] 41.46 3.55 4.260000
#> [20,] 41.93 3.54 3.310000
#> [21,] 42.48 3.53 4.160000
#> [22,] 43.14 3.51 0.370000
#> [23,] 43.92 3.48 -2.170000
#> [24,] 44.84 3.44 -1.930000
#> [25,] 45.90 3.40 -3.700000
#> [26,] 47.10 3.35 -0.480000
#> [27,] 48.40 3.30 -
<details>
<summary>英文:</summary>
THe "ncp" option in `imputePCA()` requires an "integer corresponding to the number of components used to to predict the missing entries"; my guess is that you are supposed to pass `pcaDim$ncp` to this argument, i.e.
``` r
#install.packages("missMDA")
library(missMDA)
# dataset
df <- structure(list(
A = c(39.64, 39.23, 38.89, 38.63, 38.44, 38.32, 38.27, 38.3, 38.4, 38.56, 38.79, 39.06, 39.36,
39.68, 40.01, 40.34, 40.68, 41.05, 41.46, 41.93, 42.48, 43.14, 43.92, 44.84, 45.9, 47.1, 48.4,
49.78, 51.2, 52.62, 54.01, 55.31, 56.52, 57.6, 58.54, 59.33, 59.98, 60.46, 60.78, 60.94, 60.92,
60.71, 60.3, 59.69, 58.87, 57.86, 56.67, 55.33, 53.87, 52.33, 50.75),
B = c(3.29, 3.29, 3.3, 3.32, 3.33, 3.35, 3.36, 3.38, 3.4, 3.42, 3.44, 3.46, 3.48, 3.5, 3.52, 3.53,
3.54, 3.55, 3.55, 3.54, 3.53, 3.51, 3.48, 3.44, 3.4, 3.35, 3.3, 3.24, 3.18, 3.12, 3.07, 3.02, 2.98,
2.96, 2.94, 2.94, 2.95, 2.98, 3.02, 3.08, 3.14, 3.22, 3.3, 3.4, 3.49, 3.59, 3.7, 3.8, 3.91, 4.02,
4.12),
C = c(NA, NA, NA, NA, NA, NA, NA, NA, 12.23, 4.3, 3.44, 3.49, 5.59, 0.76, 4.2, 4.61, 4.6, 8.03,
4.26, 3.31, 4.16, 0.37, -2.17, -1.93, -3.7, -0.48, -4.63, -2.89, -4.29, -2.35, -2.27, -1.66, -2.82,
-3.2, -2.55, -0.9, -2.42, -0.84, -1.46, -5.39, -2.57, -2.12, -0.59, 1.75, 3.7, 0.99, -3.28, -3.2,
-0.6, -0.61, -1.94)), class = "data.frame", row.names = c(NA, -51L))
# estimate number of dimensions
pcaDim <- estim_ncpPCA(X = df, method.cv = "Kfold")
#> |=| 1%
#> ...
#> |===============================================| 100%
# PCA
pca <- imputePCA(X = df, ncp = pcaDim$ncp)
pca
#> $completeObs
#> A B C
#> [1,] 39.64 3.29 3.165764
#> [2,] 39.23 3.29 3.282610
#> [3,] 38.89 3.30 3.386259
#> [4,] 38.63 3.32 3.473863
#> [5,] 38.44 3.33 3.534764
#> [6,] 38.32 3.35 3.582469
#> [7,] 38.27 3.36 3.603472
#> [8,] 38.30 3.38 3.608428
#> [9,] 38.40 3.40 12.230000
#> [10,] 38.56 3.42 4.300000
#> [11,] 38.79 3.44 3.440000
#> [12,] 39.06 3.46 3.490000
#> [13,] 39.36 3.48 5.590000
#> [14,] 39.68 3.50 0.760000
#> [15,] 40.01 3.52 4.200000
#> [16,] 40.34 3.53 4.610000
#> [17,] 40.68 3.54 4.600000
#> [18,] 41.05 3.55 8.030000
#> [19,] 41.46 3.55 4.260000
#> [20,] 41.93 3.54 3.310000
#> [21,] 42.48 3.53 4.160000
#> [22,] 43.14 3.51 0.370000
#> [23,] 43.92 3.48 -2.170000
#> [24,] 44.84 3.44 -1.930000
#> [25,] 45.90 3.40 -3.700000
#> [26,] 47.10 3.35 -0.480000
#> [27,] 48.40 3.30 -4.630000
#> [28,] 49.78 3.24 -2.890000
#> [29,] 51.20 3.18 -4.290000
#> [30,] 52.62 3.12 -2.350000
#> [31,] 54.01 3.07 -2.270000
#> [32,] 55.31 3.02 -1.660000
#> [33,] 56.52 2.98 -2.820000
#> [34,] 57.60 2.96 -3.200000
#> [35,] 58.54 2.94 -2.550000
#> [36,] 59.33 2.94 -0.900000
#> [37,] 59.98 2.95 -2.420000
#> [38,] 60.46 2.98 -0.840000
#> [39,] 60.78 3.02 -1.460000
#> [40,] 60.94 3.08 -5.390000
#> [41,] 60.92 3.14 -2.570000
#> [42,] 60.71 3.22 -2.120000
#> [43,] 60.30 3.30 -0.590000
#> [44,] 59.69 3.40 1.750000
#> [45,] 58.87 3.49 3.700000
#> [46,] 57.86 3.59 0.990000
#> [47,] 56.67 3.70 -3.280000
#> [48,] 55.33 3.80 -3.200000
#> [49,] 53.87 3.91 -0.600000
#> [50,] 52.33 4.02 -0.610000
#> [51,] 50.75 4.12 -1.940000
#>
#> $fittedX
#> [,1] [,2] [,3]
#> [1,] 42.95879 3.348589 3.1657679
#> [2,] 42.69009 3.350082 3.2826138
#> [3,] 42.42911 3.357875 3.3862636
#> [4,] 42.18243 3.371930 3.4738672
#> [5,] 42.01977 3.379176 3.5347685
#> [6,] 41.86484 3.392720 3.5824735
#> [7,] 41.79393 3.399456 3.6034761
#> [8,] 41.73731 3.412455 3.6084326
#> [9,] 33.61515 3.443555 7.1620564
#> [10,] 41.06141 3.439293 3.8665594
#> [11,] 41.89470 3.449846 3.4806705
#> [12,] 41.88883 3.462211 3.4641031
#> [13,] 39.96924 3.478843 4.2895758
#> [14,] 44.56603 3.480684 2.2482374
#> [15,] 41.39955 3.500073 3.6223793
#> [16,] 41.11064 3.506481 3.7405667
#> [17,] 41.22006 3.511967 3.6835383
#> [18,] 38.11028 3.524673 5.0428963
#> [19,] 41.81001 3.515415 3.4165720
#> [20,] 42.91628 3.505429 2.9414713
#> [21,] 42.36301 3.499026 3.1967494
#> [22,] 46.24205 3.475887 1.5124276
#> [23,] 49.02543 3.448501 0.3205734
#> [24,] 49.28392 3.420059 0.2500361
#> [25,] 51.48448 3.386920 -0.6744430
#> [26,] 49.08453 3.357422 0.4355638
#> [27,] 53.64523 3.311955 -1.5164266
#> [28,] 52.73688 3.272226 -1.0520184
#> [29,] 54.79311 3.225700 -1.8917432
#> [30,] 53.71243 3.186274 -1.3513846
#> [31,] 54.33523 3.149525 -1.5705957
#> [32,] 54.42535 3.114176 -1.5557606
#> [33,] 56.11116 3.081878 -2.2532735
#> [34,] 56.95039 3.064711 -2.5988257
#> [35,] 56.76797 3.050159 -2.4953707
#> [36,] 55.52461 3.051267 -1.9457068
#> [37,] 57.17241 3.052602 -2.6785073
#> [38,] 55.77920 3.074121 -2.0940370
#> [39,] 56.35838 3.097990 -2.3878837
#> [40,] 59.92066 3.128388 -4.0147352
#> [41,] 57.07359 3.173680 -2.8223941
#> [42,] 56.31515 3.227590 -2.5696341
#> [43,] 54.46483 3.284403 -1.8371731
#> [44,] 51.71262 3.356625 -0.7286491
#> [45,] 49.27708 3.422116 0.2498835
#> [46,] 51.11271 3.484818 -0.6613526
#> [47,] 54.31190 3.551291 -2.1831140
#> [48,] 53.39923 3.620926 -1.8863391
#> [49,] 50.04147 3.702821 -0.5242730
#> [50,] 49.10399 3.779411 -0.2272768
#> [51,] 49.42253 3.846778 -0.4729761
<sup>Created on 2023-06-06 with reprex v2.0.2</sup>
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论