英文:
How to visualize a correlation of one variable to many using the corrplot package in r
问题
我试图在R中使用corrplot包可视化一个变量(cl_wet)与其他16个变量之间的相关性。我的数据包含相当多的NA值,但我之前在相关性代码中已经成功排除了它们。只是可视化不起作用:
这是我运行的代码:
liqcor <- cor(x = liquid.wet$cl_wet, y = liquid.wet[2:17], use = "complete.obs")
corrplot(liqcor)
我一直在收到这些错误消息:
Error in symbols(Pos, add = TRUE, inches = FALSE, rectangles = matrix(1, :
invalid symbol coordinates
Warning in min(corr, na.rm = TRUE) :
no non-missing arguments to min; returning Inf
Warning in max(corr, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Error in symbols(Pos, add = TRUE, inches = FALSE, rectangles = matrix(1, :
invalid symbol coordinates
任何建议都会受到欢迎!以下是我用于重现的代码示例:
liquid.dataset <-
structure(
list(
cl_wet = c(
0.15738,
0.07897,
0.21313,
0.20552,
0.21005,
0.3,
0.30583,
0.29432,
0.22091,
0.14322,
0.17247,
0.29264,
0.12911,
0.2439,
0.32264,
0.333,
0.4097,
0.1386,
0.25436,
0.52432,
0.44101,
0.20917,
0.14436,
0.17538,
0.13455
),
Moisture = c(
95,
98,
95,
96,
95,
93,
89,
91,
88,
96,
96,
93,
96,
91,
89,
92,
88,
NA,
NA,
89,
89,
96,
96,
96,
97
),
Dry.matter = c(
5L,
2L,
5L,
4L,
5L,
7L,
11L,
9L,
12L,
4L,
4L,
7L,
4L,
9L,
11L,
8L,
12L,
NA,
NA,
11L,
11L,
4L,
4L,
4L,
3L
),
TN = c(
0.530443645,
0.28263789,
0.512529976,
0.497601918,
0.491630695,
0.666786571,
0.745407674,
0.723513189,
0.980275779,
0.617026379,
0.330407674,
0.719532374,
0.447841727,
0.768297362,
1.062877698,
0.91558753,
1.188273381,
NA,
NA,
0.878764988,
0.860851319,
0.468741007,
0.436894484,
0.371211031,
0.301546763
),
P2O5 = c(
0.179856115,
0.082733813,
0.179856115,
0.191846523,
0.191846523,
0.167865707,
0.179856115,
0.20383693,
0.383693046,
0.167865707,
0.101918465,
0.287769784,
0.143884892,
0.251798561,
0.419664269,
0.35971223,
0.575539568,
NA,
NA,
0.323741007,
0.35971223,
0.116306954,
0.143884892,
0.131894484,
0.10911271
),
K2O = c(
0.275779376,
0.1558753,
0.347721823,
0.323741007,
0.335731415,
0.419664269,
0.431654676,
0.431654676,
0.503597122,
0.347721823,
0.275779376,
0.455635492,
0.251798561,
0.419664269,
0.575539568,
0.575539568,
0.815347722,
NA,
NA,
0.539568345,
0.551558753,
0.383693046,
0.239808153,
0.227817746,
0.287769784
),
Na = c(
0.065947242,
0.037170264,
0.088729017,
0.082733813,
0.085131894,
NA,
NA,
NA,
NA,
NA,
0.082733813,
NA,
0.049160671,
0.100719424,
0.179856115,
0.179856115,
0.251798561,
NA,
NA,
0.143884892,
0.143884892,
0.088729017,
0.053956835,
0.076738609,
0.088729017
),
Ca = c(
0.083932854,
0.049160671,
0.075539568,
0.087529976,
0.086330935,
NA,
NA,
NA,
NA,
NA,
0.080335731,
NA,
<details>
<summary>英文:</summary>
I am trying to visualize the correlation of one variable (cl_wet) on 16 other variables in R using the corrplot package. My data does contain a fair amount of NA values, but I have been able to omit them in my correlation code I have used before. It just won't work for the visualization:
Here is the code I have been running:
liqcor <- cor(x = liquid.wet$cl_wet, y = liquid.wet[2:17], use = "complete.obs")
corrplot(liqcor)
And I have been receiving these errors:
Error in symbols(Pos, add = TRUE, inches = FALSE, rectangles = matrix(1, :
invalid symbol coordinates
Warning in min(corr, na.rm = TRUE) :
no non-missing arguments to min; returning Inf
Warning in max(corr, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Error in symbols(Pos, add = TRUE, inches = FALSE, rectangles = matrix(1, :
invalid symbol coordinates
Any advice is appreciated! Below is a sample of my code for reproducibility
liquid.dataset <-
structure(
list(
cl_wet = c(
0.15738,
0.07897,
0.21313,
0.20552,
0.21005,
0.3,
0.30583,
0.29432,
0.22091,
0.14322,
0.17247,
0.29264,
0.12911,
0.2439,
0.32264,
0.333,
0.4097,
0.1386,
0.25436,
0.52432,
0.44101,
0.20917,
0.14436,
0.17538,
0.13455
),
Moisture = c(
95,
98,
95,
96,
95,
93,
89,
91,
88,
96,
96,
93,
96,
91,
89,
92,
88,
NA,
NA,
89,
89,
96,
96,
96,
97
),
Dry.matter = c(
5L,
2L,
5L,
4L,
5L,
7L,
11L,
9L,
12L,
4L,
4L,
7L,
4L,
9L,
11L,
8L,
12L,
NA,
NA,
11L,
11L,
4L,
4L,
4L,
3L
),
TN = c(
0.530443645,
0.28263789,
0.512529976,
0.497601918,
0.491630695,
0.666786571,
0.745407674,
0.723513189,
0.980275779,
0.617026379,
0.330407674,
0.719532374,
0.447841727,
0.768297362,
1.062877698,
0.91558753,
1.188273381,
NA,
NA,
0.878764988,
0.860851319,
0.468741007,
0.436894484,
0.371211031,
0.301546763
),
P2O5 = c(
0.179856115,
0.082733813,
0.179856115,
0.191846523,
0.191846523,
0.167865707,
0.179856115,
0.20383693,
0.383693046,
0.167865707,
0.101918465,
0.287769784,
0.143884892,
0.251798561,
0.419664269,
0.35971223,
0.575539568,
NA,
NA,
0.323741007,
0.35971223,
0.116306954,
0.143884892,
0.131894484,
0.10911271
),
K2O = c(
0.275779376,
0.1558753,
0.347721823,
0.323741007,
0.335731415,
0.419664269,
0.431654676,
0.431654676,
0.503597122,
0.347721823,
0.275779376,
0.455635492,
0.251798561,
0.419664269,
0.575539568,
0.575539568,
0.815347722,
NA,
NA,
0.539568345,
0.551558753,
0.383693046,
0.239808153,
0.227817746,
0.287769784
),
Na = c(
0.065947242,
0.037170264,
0.088729017,
0.082733813,
0.085131894,
NA,
NA,
NA,
NA,
NA,
0.082733813,
NA,
0.049160671,
0.100719424,
0.179856115,
0.179856115,
0.251798561,
NA,
NA,
0.143884892,
0.143884892,
0.088729017,
0.053956835,
0.076738609,
0.088729017
),
Ca = c(
0.083932854,
0.049160671,
0.075539568,
0.087529976,
0.086330935,
NA,
NA,
NA,
NA,
NA,
0.080335731,
NA,
0.085131894,
0.09352518,
0.131894484,
0.131894484,
0.167865707,
NA,
NA,
0.112709832,
0.117505995,
0.052757794,
0.073141487,
0.086330935,
0.083932854
),
Mg = c(
0.059952038,
0.034772182,
0.07793765,
0.081534772,
0.081534772,
NA,
NA,
NA,
NA,
NA,
0.064748201,
NA,
0.050359712,
0.098321343,
0.119904077,
0.10911271,
0.167865707,
NA,
NA,
0.215827338,
0.227817746,
0.047961631,
0.056354916,
0.080335731,
0.061151079
),
Zn = c(
0.028776978,
0.013189448,
0.044364508,
0.044364508,
0.045563549,
NA,
NA,
NA,
NA,
NA,
0.004916067,
NA,
0.029976019,
0.007074341,
0.007553957,
0.006235012,
0.009952038,
NA,
NA,
0.006594724,
0.006714628,
0.013189448,
0.023980815,
0.005755396,
0.001115108
),
Fe = c(
0.008992806,
0.004916067,
0.023980815,
0.025179856,
0.026378897,
NA,
NA,
NA,
NA,
NA,
0.01558753,
NA,
0.010551559,
0.00911271,
0.017985612,
0.017985612,
0.022781775,
NA,
NA,
0.013189448,
0.01558753,
0.005635492,
0.008513189,
0.014388489,
0.002877698
),
Mn = c(
0.001918465,
0.000791367,
0.003597122,
0.003717026,
0.00383693,
NA,
NA,
NA,
NA,
NA,
0.001558753,
NA,
0.001798561,
0.001798561,
0.003117506,
0.00323741,
0.003956835,
NA,
NA,
0.003956835,
0.004316547,
0.001318945,
0.001558753,
0.001438849,
0.000683453
),
Cu = c(
0.003117506,
0.002398082,
0.007913669,
0.008393285,
0.008633094,
NA,
NA,
NA,
NA,
NA,
0.001558753,
NA,
0.002517986,
0.006354916,
0.003357314,
0.002278177,
0.004796163,
NA,
NA,
0.001558753,
0.001558753,
0.001438849,
0.00383693,
0.002398082,
0.000959233
),
S = c(
0.071942446,
0.028776978,
0.106714628,
0.107913669,
0.107913669,
NA,
NA,
NA,
NA,
NA,
0.028776978,
NA,
0.037170264,
0.068345324,
0.131894484,
0.131894484,
0.179856115,
NA,
NA,
0.095923261,
0.094724221,
0.081534772,
0.061151079,
0.034772182,
0.023980815
),
NH3 = c(
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
0.851318945,
0.731414868,
0.923261391,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA
),
TC = c(
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_
),
NO3 = c(
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
0.001318945,
7.79e-05,
3.6e-05,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA
)
),
row.names = c(NA, 25L),
class = "data.frame"
)
</details>
# 答案1
**得分**: 1
由于您的特征存在缺失值且缺失值的数量在特征之间不相等,因此不建议在`cor()`函数中使用`complete.obs`选项,因为会完全删除包含缺失值的每一行,从而丢失了确实不存在缺失的某些成对比较的信息。相反,最好使用`'pairwise.complete.obs'`选项,其中仅会删除特定成对相关性中的缺失值。因此,将使用所有完整的观测对这些变量上的每对变量之间的相关性或协方差进行计算。
```R
# 计算相关性
liqcor <- cor(x = liquid.dataset$cl_wet, y = liquid.dataset[2:17], use = "pairwise.complete.obs")
# 绘制相关性矩阵
library(corrplot)
corrplot(liqcor
, addgrid.col = T
, type = 'upper'
, addCoef.col = T
, number.cex = .7
, diag = T
, tl.cex = .9)
[1]: https://i.stack.imgur.com/OH1jz.png
英文:
Since you have missing values and the number of missing values are not equal between your features, it is not recommended to use complete.obs
option within the cor()
function because every row containing missing value will be completely removed and you will loose information for some pairwise comparisons that are not missing indeed. Instead, better to use 'pairwise.complete.obs
' option where missing values will be removed only a particular pairwise correlation. Thus the correlation or covariance between each pair of variables is computed using all complete pairs of observations on those variables.
#calculate correlation
liqcor <- cor(x = liquid.dataset$cl_wet, y =liquid.dataset[2:17], use = "pairwise.complete.obs")
# plot correlation matrix
library(corrplot)
corrplot(liqcor
,addgrid.col = T
,type = 'upper'
,addCoef.col = T
,number.cex = .7
,diag = T
,tl.cex = .9)
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论