使用缺失值进行主成分分析时出错,使用missMDA。

huangapple go评论72阅读模式
英文:

Error using missing values in Principal Components Analysis with missMDA

问题

我有一个数据集中某个变量存在缺失值,我正在尝试对其进行PCA,但一直收到相同的错误消息。

# 包
library(tidyverse)
library(missMDA)

# 数据集
df <- structure(list(
  A = c(39.64, 39.23, 38.89, 38.63, 38.44, 38.32, 38.27, 38.3, 38.4, 38.56, 38.79, 39.06, 39.36, 
  39.68, 40.01, 40.34, 40.68, 41.05, 41.46, 41.93, 42.48, 43.14, 43.92, 44.84, 45.9, 47.1, 48.4, 
  49.78, 51.2, 52.62, 54.01, 55.31, 56.52, 57.6, 58.54, 59.33, 59.98, 60.46, 60.78, 60.94, 60.92, 
  60.71, 60.3, 59.69, 58.87, 57.86, 56.67, 55.33, 53.87, 52.33, 50.75), 
  B = c(3.29, 3.29, 3.3, 3.32, 3.33, 3.35, 3.36, 3.38, 3.4, 3.42, 3.44, 3.46, 3.48, 3.5, 3.52, 3.53, 
  3.54, 3.55, 3.55, 3.54, 3.53, 3.51, 3.48, 3.44, 3.4, 3.35, 3.3, 3.24, 3.18, 3.12, 3.07, 3.02, 2.98, 
  2.96, 2.94, 2.94, 2.95, 2.98, 3.02, 3.08, 3.14, 3.22, 3.3, 3.4, 3.49, 3.59, 3.7, 3.8, 3.91, 4.02, 
  4.12), 
  C = c(NA, NA, NA, NA, NA, NA, NA, NA, 12.23, 4.3, 3.44, 3.49, 5.59, 0.76, 4.2, 4.61, 4.6, 8.03, 
  4.26, 3.31, 4.16, 0.37, -2.17, -1.93, -3.7, -0.48, -4.63, -2.89, -4.29, -2.35, -2.27, -1.66, -2.82, 
  -3.2, -2.55, -0.9, -2.42, -0.84, -1.46, -5.39, -2.57, -2.12, -0.59, 1.75, 3.7, 0.99, -3.28, -3.2, 
  -0.6, -0.61, -1.94)), class = "data.frame", row.names = c(NA, -51L))

# 估算维度
pcaDim <- estim_ncpPCA(X = df, method.cv = "Kfold")

# PCA
pca <- imputePCA(X = df, ncp = pcaDim)
Error in imputePCA(X = df, ncp = pcaDim) : 
  'list' object cannot be coerced to type 'double'

# 我尝试使用unlist函数修复此错误,但未能解决。
df <- mutate(df, across(everything(), ~as.numeric(unlist(.))))

希望这有帮助。

英文:

I have a dataset with missing values in one variable that I'm trying to perform a PCA on, however, I keep getting the same error message.

# packages
library(tidyverse)
library(missMDA)
# dataset
df &lt;- structure(list(
A = c(39.64, 39.23, 38.89, 38.63, 38.44, 38.32, 38.27, 38.3, 38.4, 38.56, 38.79, 39.06, 39.36, 
39.68, 40.01, 40.34, 40.68, 41.05, 41.46, 41.93, 42.48, 43.14, 43.92, 44.84, 45.9, 47.1, 48.4, 
49.78, 51.2, 52.62, 54.01, 55.31, 56.52, 57.6, 58.54, 59.33, 59.98, 60.46, 60.78, 60.94, 60.92, 
60.71, 60.3, 59.69, 58.87, 57.86, 56.67, 55.33, 53.87, 52.33, 50.75), 
B = c(3.29, 3.29, 3.3, 3.32, 3.33, 3.35, 3.36, 3.38, 3.4, 3.42, 3.44, 3.46, 3.48, 3.5, 3.52, 3.53, 
3.54, 3.55, 3.55, 3.54, 3.53, 3.51, 3.48, 3.44, 3.4, 3.35, 3.3, 3.24, 3.18, 3.12, 3.07, 3.02, 2.98, 
2.96, 2.94, 2.94, 2.95, 2.98, 3.02, 3.08, 3.14, 3.22, 3.3, 3.4, 3.49, 3.59, 3.7, 3.8, 3.91, 4.02, 
4.12), 
C = c(NA, NA, NA, NA, NA, NA, NA, NA, 12.23, 4.3, 3.44, 3.49, 5.59, 0.76, 4.2, 4.61, 4.6, 8.03, 
4.26, 3.31, 4.16, 0.37, -2.17, -1.93, -3.7, -0.48, -4.63, -2.89, -4.29, -2.35, -2.27, -1.66, -2.82, 
-3.2, -2.55, -0.9, -2.42, -0.84, -1.46, -5.39, -2.57, -2.12, -0.59, 1.75, 3.7, 0.99, -3.28, -3.2, 
-0.6, -0.61, -1.94)), class = &quot;data.frame&quot;, row.names = c(NA, -51L))
# estimate number of dimensions
pcaDim &lt;- estim_ncpPCA(X = df, method.cv = &quot;Kfold&quot;)
# PCA
pca &lt;- imputePCA(X = df, ncp = pcaDim)
Error in imputePCA(X = df, ncp = pcaDim) : 
&#39;list&#39; object cannot be coerced to type &#39;double&#39;
# I&#39;ve tried to fix this error with the unlist function, but that has not helped.
df &lt;- mutate(df, across(everything(), ~as.numeric(unlist(.))))

答案1

得分: 1

The "ncp"选项在imputePCA()中需要一个“用于预测缺失条目的组件数量的整数”;我的猜测是你应该将pcaDim$ncp传递给此参数,即:

#install.packages(&quot;missMDA&quot;)
library(missMDA)

# dataset
df <- structure(list(
  A = c(39.64, 39.23, 38.89, 38.63, 38.44, 38.32, 38.27, 38.3, 38.4, 38.56, 38.79, 39.06, 39.36, 
        39.68, 40.01, 40.34, 40.68, 41.05, 41.46, 41.93, 42.48, 43.14, 43.92, 44.84, 45.9, 47.1, 48.4, 
        49.78, 51.2, 52.62, 54.01, 55.31, 56.52, 57.6, 58.54, 59.33, 59.98, 60.46, 60.78, 60.94, 60.92, 
        60.71, 60.3, 59.69, 58.87, 57.86, 56.67, 55.33, 53.87, 52.33, 50.75), 
  B = c(3.29, 3.29, 3.3, 3.32, 3.33, 3.35, 3.36, 3.38, 3.4, 3.42, 3.44, 3.46, 3.48, 3.5, 3.52, 3.53, 
        3.54, 3.55, 3.55, 3.54, 3.53, 3.51, 3.48, 3.44, 3.4, 3.35, 3.3, 3.24, 3.18, 3.12, 3.07, 3.02, 2.98, 
        2.96, 2.94, 2.94, 2.95, 2.98, 3.02, 3.08, 3.14, 3.22, 3.3, 3.4, 3.49, 3.59, 3.7, 3.8, 3.91, 4.02, 
        4.12), 
  C = c(NA, NA, NA, NA, NA, NA, NA, NA, 12.23, 4.3, 3.44, 3.49, 5.59, 0.76, 4.2, 4.61, 4.6, 8.03, 
        4.26, 3.31, 4.16, 0.37, -2.17, -1.93, -3.7, -0.48, -4.63, -2.89, -4.29, -2.35, -2.27, -1.66, -2.82, 
        -3.2, -2.55, -0.9, -2.42, -0.84, -1.46, -5.39, -2.57, -2.12, -0.59, 1.75, 3.7, 0.99, -3.28, -3.2, 
        -0.6, -0.61, -1.94)), class = "data.frame", row.names = c(NA, -51L))

# estimate number of dimensions
pcaDim <- estim_ncpPCA(X = df, method.cv = "Kfold")
#> |=| 1%
#> ...
#> |===============================================| 100%

# PCA
pca <- imputePCA(X = df, ncp = pcaDim$ncp)

pca
#> $completeObs
#>           A    B         C
#>  [1,] 39.64 3.29  3.165764
#>  [2,] 39.23 3.29  3.282610
#>  [3,] 38.89 3.30  3.386259
#>  [4,] 38.63 3.32  3.473863
#>  [5,] 38.44 3.33  3.534764
#>  [6,] 38.32 3.35  3.582469
#>  [7,] 38.27 3.36  3.603472
#>  [8,] 38.30 3.38  3.608428
#>  [9,] 38.40 3.40 12.230000
#> [10,] 38.56 3.42  4.300000
#> [11,] 38.79 3.44  3.440000
#> [12,] 39.06 3.46  3.490000
#> [13,] 39.36 3.48  5.590000
#> [14,] 39.68 3.50  0.760000
#> [15,] 40.01 3.52  4.200000
#> [16,] 40.34 3.53  4.610000
#> [17,] 40.68 3.54  4.600000
#> [18,] 41.05 3.55  8.030000
#> [19,] 41.46 3.55  4.260000
#> [20,] 41.93 3.54  3.310000
#> [21,] 42.48 3.53  4.160000
#> [22,] 43.14 3.51  0.370000
#> [23,] 43.92 3.48 -2.170000
#> [24,] 44.84 3.44 -1.930000
#> [25,] 45.90 3.40 -3.700000
#> [26,] 47.10 3.35 -0.480000
#> [27,] 48.40 3.30 -

<details>
<summary>英文:</summary>

THe &quot;ncp&quot; option in `imputePCA()` requires an &quot;integer corresponding to the number of components used to to predict the missing entries&quot;; my guess is that you are supposed to pass `pcaDim$ncp` to this argument, i.e.

``` r
#install.packages(&quot;missMDA&quot;)
library(missMDA)

# dataset
df &lt;- structure(list(
  A = c(39.64, 39.23, 38.89, 38.63, 38.44, 38.32, 38.27, 38.3, 38.4, 38.56, 38.79, 39.06, 39.36, 
        39.68, 40.01, 40.34, 40.68, 41.05, 41.46, 41.93, 42.48, 43.14, 43.92, 44.84, 45.9, 47.1, 48.4, 
        49.78, 51.2, 52.62, 54.01, 55.31, 56.52, 57.6, 58.54, 59.33, 59.98, 60.46, 60.78, 60.94, 60.92, 
        60.71, 60.3, 59.69, 58.87, 57.86, 56.67, 55.33, 53.87, 52.33, 50.75), 
  B = c(3.29, 3.29, 3.3, 3.32, 3.33, 3.35, 3.36, 3.38, 3.4, 3.42, 3.44, 3.46, 3.48, 3.5, 3.52, 3.53, 
        3.54, 3.55, 3.55, 3.54, 3.53, 3.51, 3.48, 3.44, 3.4, 3.35, 3.3, 3.24, 3.18, 3.12, 3.07, 3.02, 2.98, 
        2.96, 2.94, 2.94, 2.95, 2.98, 3.02, 3.08, 3.14, 3.22, 3.3, 3.4, 3.49, 3.59, 3.7, 3.8, 3.91, 4.02, 
        4.12), 
  C = c(NA, NA, NA, NA, NA, NA, NA, NA, 12.23, 4.3, 3.44, 3.49, 5.59, 0.76, 4.2, 4.61, 4.6, 8.03, 
        4.26, 3.31, 4.16, 0.37, -2.17, -1.93, -3.7, -0.48, -4.63, -2.89, -4.29, -2.35, -2.27, -1.66, -2.82, 
        -3.2, -2.55, -0.9, -2.42, -0.84, -1.46, -5.39, -2.57, -2.12, -0.59, 1.75, 3.7, 0.99, -3.28, -3.2, 
        -0.6, -0.61, -1.94)), class = &quot;data.frame&quot;, row.names = c(NA, -51L))

# estimate number of dimensions
pcaDim &lt;- estim_ncpPCA(X = df, method.cv = &quot;Kfold&quot;)
#&gt; |=| 1%
#&gt; ...
#&gt; |===============================================| 100%

# PCA
pca &lt;- imputePCA(X = df, ncp = pcaDim$ncp)

pca
#&gt; $completeObs
#&gt;           A    B         C
#&gt;  [1,] 39.64 3.29  3.165764
#&gt;  [2,] 39.23 3.29  3.282610
#&gt;  [3,] 38.89 3.30  3.386259
#&gt;  [4,] 38.63 3.32  3.473863
#&gt;  [5,] 38.44 3.33  3.534764
#&gt;  [6,] 38.32 3.35  3.582469
#&gt;  [7,] 38.27 3.36  3.603472
#&gt;  [8,] 38.30 3.38  3.608428
#&gt;  [9,] 38.40 3.40 12.230000
#&gt; [10,] 38.56 3.42  4.300000
#&gt; [11,] 38.79 3.44  3.440000
#&gt; [12,] 39.06 3.46  3.490000
#&gt; [13,] 39.36 3.48  5.590000
#&gt; [14,] 39.68 3.50  0.760000
#&gt; [15,] 40.01 3.52  4.200000
#&gt; [16,] 40.34 3.53  4.610000
#&gt; [17,] 40.68 3.54  4.600000
#&gt; [18,] 41.05 3.55  8.030000
#&gt; [19,] 41.46 3.55  4.260000
#&gt; [20,] 41.93 3.54  3.310000
#&gt; [21,] 42.48 3.53  4.160000
#&gt; [22,] 43.14 3.51  0.370000
#&gt; [23,] 43.92 3.48 -2.170000
#&gt; [24,] 44.84 3.44 -1.930000
#&gt; [25,] 45.90 3.40 -3.700000
#&gt; [26,] 47.10 3.35 -0.480000
#&gt; [27,] 48.40 3.30 -4.630000
#&gt; [28,] 49.78 3.24 -2.890000
#&gt; [29,] 51.20 3.18 -4.290000
#&gt; [30,] 52.62 3.12 -2.350000
#&gt; [31,] 54.01 3.07 -2.270000
#&gt; [32,] 55.31 3.02 -1.660000
#&gt; [33,] 56.52 2.98 -2.820000
#&gt; [34,] 57.60 2.96 -3.200000
#&gt; [35,] 58.54 2.94 -2.550000
#&gt; [36,] 59.33 2.94 -0.900000
#&gt; [37,] 59.98 2.95 -2.420000
#&gt; [38,] 60.46 2.98 -0.840000
#&gt; [39,] 60.78 3.02 -1.460000
#&gt; [40,] 60.94 3.08 -5.390000
#&gt; [41,] 60.92 3.14 -2.570000
#&gt; [42,] 60.71 3.22 -2.120000
#&gt; [43,] 60.30 3.30 -0.590000
#&gt; [44,] 59.69 3.40  1.750000
#&gt; [45,] 58.87 3.49  3.700000
#&gt; [46,] 57.86 3.59  0.990000
#&gt; [47,] 56.67 3.70 -3.280000
#&gt; [48,] 55.33 3.80 -3.200000
#&gt; [49,] 53.87 3.91 -0.600000
#&gt; [50,] 52.33 4.02 -0.610000
#&gt; [51,] 50.75 4.12 -1.940000
#&gt; 
#&gt; $fittedX
#&gt;           [,1]     [,2]       [,3]
#&gt;  [1,] 42.95879 3.348589  3.1657679
#&gt;  [2,] 42.69009 3.350082  3.2826138
#&gt;  [3,] 42.42911 3.357875  3.3862636
#&gt;  [4,] 42.18243 3.371930  3.4738672
#&gt;  [5,] 42.01977 3.379176  3.5347685
#&gt;  [6,] 41.86484 3.392720  3.5824735
#&gt;  [7,] 41.79393 3.399456  3.6034761
#&gt;  [8,] 41.73731 3.412455  3.6084326
#&gt;  [9,] 33.61515 3.443555  7.1620564
#&gt; [10,] 41.06141 3.439293  3.8665594
#&gt; [11,] 41.89470 3.449846  3.4806705
#&gt; [12,] 41.88883 3.462211  3.4641031
#&gt; [13,] 39.96924 3.478843  4.2895758
#&gt; [14,] 44.56603 3.480684  2.2482374
#&gt; [15,] 41.39955 3.500073  3.6223793
#&gt; [16,] 41.11064 3.506481  3.7405667
#&gt; [17,] 41.22006 3.511967  3.6835383
#&gt; [18,] 38.11028 3.524673  5.0428963
#&gt; [19,] 41.81001 3.515415  3.4165720
#&gt; [20,] 42.91628 3.505429  2.9414713
#&gt; [21,] 42.36301 3.499026  3.1967494
#&gt; [22,] 46.24205 3.475887  1.5124276
#&gt; [23,] 49.02543 3.448501  0.3205734
#&gt; [24,] 49.28392 3.420059  0.2500361
#&gt; [25,] 51.48448 3.386920 -0.6744430
#&gt; [26,] 49.08453 3.357422  0.4355638
#&gt; [27,] 53.64523 3.311955 -1.5164266
#&gt; [28,] 52.73688 3.272226 -1.0520184
#&gt; [29,] 54.79311 3.225700 -1.8917432
#&gt; [30,] 53.71243 3.186274 -1.3513846
#&gt; [31,] 54.33523 3.149525 -1.5705957
#&gt; [32,] 54.42535 3.114176 -1.5557606
#&gt; [33,] 56.11116 3.081878 -2.2532735
#&gt; [34,] 56.95039 3.064711 -2.5988257
#&gt; [35,] 56.76797 3.050159 -2.4953707
#&gt; [36,] 55.52461 3.051267 -1.9457068
#&gt; [37,] 57.17241 3.052602 -2.6785073
#&gt; [38,] 55.77920 3.074121 -2.0940370
#&gt; [39,] 56.35838 3.097990 -2.3878837
#&gt; [40,] 59.92066 3.128388 -4.0147352
#&gt; [41,] 57.07359 3.173680 -2.8223941
#&gt; [42,] 56.31515 3.227590 -2.5696341
#&gt; [43,] 54.46483 3.284403 -1.8371731
#&gt; [44,] 51.71262 3.356625 -0.7286491
#&gt; [45,] 49.27708 3.422116  0.2498835
#&gt; [46,] 51.11271 3.484818 -0.6613526
#&gt; [47,] 54.31190 3.551291 -2.1831140
#&gt; [48,] 53.39923 3.620926 -1.8863391
#&gt; [49,] 50.04147 3.702821 -0.5242730
#&gt; [50,] 49.10399 3.779411 -0.2272768
#&gt; [51,] 49.42253 3.846778 -0.4729761

<sup>Created on 2023-06-06 with reprex v2.0.2</sup>

huangapple
  • 本文由 发表于 2023年6月6日 07:22:28
  • 转载请务必保留本文链接:https://go.coder-hub.com/76410556.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定