2023年3月31日 22:01:28go评论83阅读模式

英文:

How I can sort these data out in R?

问题

这是我的数据：

data <- data.frame(
  ID = 1:6,
  Course1A = c("A1", "A1", "A1", "A1", "A1", "A1"),
  Time1A = c(1, 1, 1, 1, 1, 1),
  Score1A = c(20, 17, 20, 16, 20, 11),
  Course2B = c("B1", "B1", "B1", "B1", "B1", "B1"),
  Time2B = c(1, 1, 1, 1, 1, 1),
  Score2B = c(12, 11, 15, 15, 10, 15),
  Course3C = c("C1", "C1", "C1", "C1", "C1", "C1"),
  TimeC3 = c(1, 1, 1, 1, 1, 1),
  ScoreC3 = c(10, 12, 12, 10, 10, 10),
  Course4D = c("D1", "D1", "D1", "D1", "D1", "D1"),
  TimeD4 = c(1, 1, 1, 1, 1, 1),
  ScoreD4 = c(20, 20, 18, 20, 17, 20),
  Course5E = c("E1", "E1", "E1", "E1", "E1", "E1"),
  TimeE5 = c(2, 2, 2, 2, 2, 2),
  Score5E = c(12, 12, 12, 12, 11, 11),
  Course6F = c("F1", "F1", "F1", "F1", "F1", "F1"),
  TimeF6 = c(2, 2, 2, 2, 2, 2),
  ScoreF6 = c(10, 10, 10, 10, 10, 9),
  Course7G = c("G1", "G1", "G1", "G1", "G1", "G1"),
  TimeG7 = c(3, 3, 3, 3, 3, 3),
  Score7G = c(12, 11, 6, 4, 12, 9),
  Course8H = c("H1", "H1", "H1", "H1", "H1", "H1"),
  TimeH8 = c(3, 3, 3, 3, 3, 3),
  Score8H = c(12, 12, 12, 12, 10, 10),
  Gender = c("F", "F", "F", "F", "F", "F"),
  Race = c("A", "B", "C", "C", "C", "C"),
  Health = c("Yes", "Yes", "Yes", "Yes", "Yes", "Yes"),
  stringsAsFactors = FALSE
)

我想获取ID、Course、Time、Gender、Race、Health和Score的列。

我已经使用了以下代码，但得到了错误的表格：

```R
reshaped_data <- data %>% 
  pivot_longer(
    cols = -c(ID, Gender, Race, Health),
    names_to = c(".value", "Group"),
    names_pattern = "([A-Za-z]+)([0-9]+[A-Z])"
  )

英文:

Here is my data:

data &lt;- data.frame(
ID = 1:6,
Course1A = c(&quot;A1&quot;, &quot;A1&quot;, &quot;A1&quot;, &quot;A1&quot;, &quot;A1&quot;, &quot;A1&quot;),
Time1A = c(1, 1, 1, 1, 1, 1),
Score1A = c(20, 17, 20, 16, 20, 11),
Course2B = c(&quot;B1&quot;, &quot;B1&quot;, &quot;B1&quot;, &quot;B1&quot;, &quot;B1&quot;, &quot;B1&quot;),
Time2B = c(1, 1, 1, 1, 1, 1),
Score2B = c(12, 11, 15, 15, 10, 15),
Course3C = c(&quot;C1&quot;, &quot;C1&quot;, &quot;C1&quot;, &quot;C1&quot;, &quot;C1&quot;, &quot;C1&quot;),
TimeC3 = c(1, 1, 1, 1, 1, 1),
ScoreC3 = c(10, 12, 12, 10, 10, 10),
Course4D = c(&quot;D1&quot;, &quot;D1&quot;, &quot;D1&quot;, &quot;D1&quot;, &quot;D1&quot;, &quot;D1&quot;),
TimeD4 = c(1, 1, 1, 1, 1, 1),
ScoreD4 = c(20, 20, 18, 20, 17, 20),
Course5E = c(&quot;E1&quot;, &quot;E1&quot;, &quot;E1&quot;, &quot;E1&quot;, &quot;E1&quot;, &quot;E1&quot;),
TimeE5 = c(2, 2, 2, 2, 2, 2),
Score5E = c(12, 12, 12, 12, 11, 11),
Course6F = c(&quot;F1&quot;, &quot;F1&quot;, &quot;F1&quot;, &quot;F1&quot;, &quot;F1&quot;, &quot;F1&quot;),
TimeF6 = c(2, 2, 2, 2, 2, 2),
ScoreF6 = c(10, 10, 10, 10, 10, 9),
Course7G = c(&quot;G1&quot;, &quot;G1&quot;, &quot;G1&quot;, &quot;G1&quot;, &quot;G1&quot;, &quot;G1&quot;),
TimeG7 = c(3, 3, 3, 3, 3, 3),
Score7G = c(12, 11, 6, 4, 12, 9),
Course8H = c(&quot;H1&quot;, &quot;H1&quot;, &quot;H1&quot;, &quot;H1&quot;, &quot;H1&quot;, &quot;H1&quot;),
TimeH8 = c(3, 3, 3, 3, 3, 3),
Score8H = c(12, 12, 12, 12, 10, 10),
Gender = c(&quot;F&quot;, &quot;F&quot;, &quot;F&quot;, &quot;F&quot;, &quot;F&quot;, &quot;F&quot;),
Race = c(&quot;A&quot;, &quot;B&quot;, &quot;C&quot;, &quot;C&quot;, &quot;C&quot;, &quot;C&quot;),
Health = c(&quot;Yes&quot;, &quot;Yes&quot;, &quot;Yes&quot;, &quot;Yes&quot;, &quot;Yes&quot;, &quot;Yes&quot;),
stringsAsFactors = FALSE

I want to get columns for ID, Course, Time, Gender, Race and health and Score.

I have used the following codes, but I get a wrong table

reshaped_data &lt;- data %&gt;% 
pivot_longer(
cols = -c(ID, Gender, Race, Health),
names_to = c(&quot;.value&quot;, &quot;Group&quot;),
names_pattern = &quot;([A-Za-z]+)([0-9]+[A-Z])&quot;
)

答案1

得分: 2

我认为，如果你使变量的后缀相同，可能会更容易理解。比如，如果你希望3C和C3得到相同的值，那么reshape就会更容易实现。

library(dplyr)
library(tidyr)
library(stringr)
data &lt;- data.frame(
  ID = 1:6,
  Course1A = c("A1", "A1", "A1", "A1", "A1", "A1"),
  Time1A = c(1, 1, 1, 1, 1, 1),
  Score1A = c(20, 17, 20, 16, 20, 11),
  Course2B = c("B1", "B1", "B1", "B1", "B1", "B1"),
  Time2B = c(1, 1, 1, 1, 1, 1),
  Score2B = c(12, 11, 15, 15, 10, 15),
  Course3C = c("C1", "C1", "C1", "C1", "C1", "C1"),
  TimeC3 = c(1, 1, 1, 1, 1, 1),
  ScoreC3 = c(10, 12, 12, 10, 10, 10),
  Course4D = c("D1", "D1", "D1", "D1", "D1", "D1"),
  TimeD4 = c(1, 1, 1, 1, 1, 1),
  ScoreD4 = c(20, 20, 18, 20, 17, 20),
  Course5E = c("E1", "E1", "E1", "E1", "E1", "E1"),
  TimeE5 = c(2, 2, 2, 2, 2, 2),
  Score5E = c(12, 12, 12, 12, 11, 11),
  Course6F = c("F1", "F1", "F1", "F1", "F1", "F1"),
  TimeF6 = c(2, 2, 2, 2, 2, 2),
  ScoreF6 = c(10, 10, 10, 10, 10, 9),
  Course7G = c("G1", "G1", "G1", "G1", "G1", "G1"),
  TimeG7 = c(3, 3, 3, 3, 3, 3),
  Score7G = c(12, 11, 6, 4, 12, 9),
  Course8H = c("H1", "H1", "H1", "H1", "H1", "H1"),
  TimeH8 = c(3, 3, 3, 3, 3, 3),
  Score8H = c(12, 12, 12, 12, 10, 10),
  Gender = c("F", "F", "F", "F", "F", "F"),
  Race = c("A", "B", "C", "C", "C", "C"),
  Health = c("Yes", "Yes", "Yes", "Yes", "Yes", "Yes"),
  stringsAsFactors = FALSE)

为此，我首先会找到你想要的变量，并保存词干（即，Time，Course，Score）和后缀（即，两个字符的结尾）。

sfx &lt;- gsub("(Time|Course|Score)(.*)", "\\2", grep("Time|Score|Course", names(data), value=TRUE))
stem &lt;- gsub("(Time|Course|Score)(.*)", "\\1", grep("Time|Score|Course", names(data), value=TRUE))

接下来，你可以将两个后缀字符拆分开，然后排序它们并粘贴在一起。

sfx &lt;- str_split(sfx, "", simplify=TRUE)
sfx &lt;- apply(sfx, 1, function(x)paste(sort(x), collapse=""))

然后，你可以将新理性化的后缀粘贴到词干上，并重新命名相关列。

names(data)[grep("Time|Score|Course", names(data))] &lt;- paste(stem, sfx, sep="")

然后，reshape就能按预期工作了。

reshaped_data &lt;- data %&gt;% 
  pivot_longer(
    cols = -c(ID, Gender, Race, Health),
    names_to = c(".value", "Group"),
    names_pattern = "(Course|Time|Score)(.{2})$"
  )
reshaped_data
#> # A tibble: 48 × 8
#>       ID Gender Race  Health Group Course  Time Score
#>    <int> <chr>  <chr> <chr>  <chr> <chr>   <dbl> <dbl>
#>  1     1 F      A     Yes    1A    A1          1    20
#>  2     1 F      A     Yes    2B    B1          1    12
#>  3     1 F      A     Yes    3C    C1          1    10
#>  4     1 F      A     Yes    4D    D1          1    20
#>  5     1 F      A     Yes    5E    E1          2    12
#>  6     1 F      A     Yes    6F    F1          2    10
#>  7     1 F      A     Yes    7G    G1          3    12
#>  8     1 F      A     Yes    8H    H1          3    12
#>  9     2 F      B     Yes    1A    A1          1    17
#> 10     2 F      B     Yes    2B    B1          1    11
#> # … with 38 more rows

英文:

I think it might be easiest if you rationalized the names for the variables to make the suffixes the same. For example, you want to make it so that 3C and C3 get the same value, then the reshape will work easily.

library(dplyr)
library(tidyr)
library(stringr)
data &lt;- data.frame(
  ID = 1:6,
  Course1A = c(&quot;A1&quot;, &quot;A1&quot;, &quot;A1&quot;, &quot;A1&quot;, &quot;A1&quot;, &quot;A1&quot;),
  Time1A = c(1, 1, 1, 1, 1, 1),
  Score1A = c(20, 17, 20, 16, 20, 11),
  Course2B = c(&quot;B1&quot;, &quot;B1&quot;, &quot;B1&quot;, &quot;B1&quot;, &quot;B1&quot;, &quot;B1&quot;),
  Time2B = c(1, 1, 1, 1, 1, 1),
  Score2B = c(12, 11, 15, 15, 10, 15),
  Course3C = c(&quot;C1&quot;, &quot;C1&quot;, &quot;C1&quot;, &quot;C1&quot;, &quot;C1&quot;, &quot;C1&quot;),
  TimeC3 = c(1, 1, 1, 1, 1, 1),
  ScoreC3 = c(10, 12, 12, 10, 10, 10),
  Course4D = c(&quot;D1&quot;, &quot;D1&quot;, &quot;D1&quot;, &quot;D1&quot;, &quot;D1&quot;, &quot;D1&quot;),
  TimeD4 = c(1, 1, 1, 1, 1, 1),
  ScoreD4 = c(20, 20, 18, 20, 17, 20),
  Course5E = c(&quot;E1&quot;, &quot;E1&quot;, &quot;E1&quot;, &quot;E1&quot;, &quot;E1&quot;, &quot;E1&quot;),
  TimeE5 = c(2, 2, 2, 2, 2, 2),
  Score5E = c(12, 12, 12, 12, 11, 11),
  Course6F = c(&quot;F1&quot;, &quot;F1&quot;, &quot;F1&quot;, &quot;F1&quot;, &quot;F1&quot;, &quot;F1&quot;),
  TimeF6 = c(2, 2, 2, 2, 2, 2),
  ScoreF6 = c(10, 10, 10, 10, 10, 9),
  Course7G = c(&quot;G1&quot;, &quot;G1&quot;, &quot;G1&quot;, &quot;G1&quot;, &quot;G1&quot;, &quot;G1&quot;),
  TimeG7 = c(3, 3, 3, 3, 3, 3),
  Score7G = c(12, 11, 6, 4, 12, 9),
  Course8H = c(&quot;H1&quot;, &quot;H1&quot;, &quot;H1&quot;, &quot;H1&quot;, &quot;H1&quot;, &quot;H1&quot;),
  TimeH8 = c(3, 3, 3, 3, 3, 3),
  Score8H = c(12, 12, 12, 12, 10, 10),
  Gender = c(&quot;F&quot;, &quot;F&quot;, &quot;F&quot;, &quot;F&quot;, &quot;F&quot;, &quot;F&quot;),
  Race = c(&quot;A&quot;, &quot;B&quot;, &quot;C&quot;, &quot;C&quot;, &quot;C&quot;, &quot;C&quot;),
  Health = c(&quot;Yes&quot;, &quot;Yes&quot;, &quot;Yes&quot;, &quot;Yes&quot;, &quot;Yes&quot;, &quot;Yes&quot;),
  stringsAsFactors = FALSE)

To do this, I would first find the variables you want and save the stem (i.e., Time, Course, Score) and the suffix (i.e., the two-character ending).

sfx &lt;- gsub(&quot;(Time|Course|Score)(.*)&quot;, &quot;\\2&quot;, grep(&quot;Time|Score|Course&quot;, names(data), value=TRUE))
stem &lt;- gsub(&quot;(Time|Course|Score)(.*)&quot;, &quot;\\1&quot;, grep(&quot;Time|Score|Course&quot;, names(data), value=TRUE))

Next, you can split the two suffix characters apart, then sort them and paste them back together.

sfx &lt;- str_split(sfx, &quot;&quot;, simplify=TRUE)
sfx &lt;- apply(sfx, 1, function(x)paste(sort(x), collapse=&quot;&quot;))

Then, you can paste the newly rationalized suffixes on to the stems and rename the relevant columns:

names(data)[grep(&quot;Time|Score|Course&quot;, names(data))] &lt;- paste(stem, sfx, sep=&quot;&quot;)

Then, the reshape works as expected.

reshaped_data &lt;- data %&gt;% 
  pivot_longer(
    cols = -c(ID, Gender, Race, Health),
    names_to = c(&quot;.value&quot;, &quot;Group&quot;),
    names_pattern = &quot;(Course|Time|Score)(.{2})$&quot;
  )
reshaped_data
#&gt; # A tibble: 48 &#215; 8
#&gt;       ID Gender Race  Health Group Course  Time Score
#&gt;    &lt;int&gt; &lt;chr&gt;  &lt;chr&gt; &lt;chr&gt;  &lt;chr&gt; &lt;chr&gt;  &lt;dbl&gt; &lt;dbl&gt;
#&gt;  1     1 F      A     Yes    1A    A1         1    20
#&gt;  2     1 F      A     Yes    2B    B1         1    12
#&gt;  3     1 F      A     Yes    3C    C1         1    10
#&gt;  4     1 F      A     Yes    4D    D1         1    20
#&gt;  5     1 F      A     Yes    5E    E1         2    12
#&gt;  6     1 F      A     Yes    6F    F1         2    10
#&gt;  7     1 F      A     Yes    7G    G1         3    12
#&gt;  8     1 F      A     Yes    8H    H1         3    12
#&gt;  9     2 F      B     Yes    1A    A1         1    17
#&gt; 10     2 F      B     Yes    2B    B1         1    11
#&gt; # … with 38 more rows

<sup>Created on 2023-03-31 with reprex v2.0.2</sup>

通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库，让每个人都能够通过互相帮助和分享经验来进步。

我如何在R中整理这些数据？

问题

答案1

R：验证抛硬币结果

如果模式在gsub中匹配，则仅返回值。

无法编译 ‘sass’，当尝试使用R安装包时。

如何在R中覆盖/替换特定行？

What's the correct way to type hint an empty list as a literal in python?

如何在Highcharts Gantt中更改本地化的星期名称

如何在同一个流中使用多个过滤器和映射函数？

如何使用Map/Set来将代码优化到O(n)？

.NET MAUI Android在GitHub Actions上构建失败，错误代码为1。

如何在Playwright视觉比较中屏蔽多个定位器？

在C++中，可以使用可变模板参数来检索类型的内部类型。

selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found

Creating and opening a URL to log in to Website via Basic Auth with Robot Framework/Selenium (Python)

AG Grid 在上下文菜单中以大文本形式打开

发表评论