如何在R中处理具有两行标题的数据框中的数据?

huangapple go评论77阅读模式
英文:

How to melt data in a data frame with a two-row header in R?

问题

以下是已翻译的代码部分:

data <- data.frame(Q1_1 = c("abc",1,2,3,4),
                   Q1_2 = c("def",2,3,6,2),
                   Q2_1 = c("ghi",4,5,3,1),
                   Q2_2 = c("jkl",6,3,8,4),
                   ID = c("ID", "AA", "BB", "CC", "DD"),
                   Order = c("Order", "zz", "ss", "tt", "qq"))
melted <- data.frame(response = c(1,2,3,4,2,3,6,2,4,5,3,1,6,3,8,4),
                    question_code = c("abc","abc","abc","abc",
                                      "def","def","def","def",
                                      "ghi","ghi","ghi","ghi",
                                      "jkl","jkl","jkl","jkl"),
                    question_number = c("Q1_1","Q1_1","Q1_1","Q1_1",
                                        "Q1_2","Q1_2","Q1_2","Q1_2",
                                        "Q2_1","Q2_1","Q2_1","Q2_1",
                                        "Q2_2","Q2_2","Q2_2","Q2_2"),
                    ID = c("AA", "BB", "CC", "DD",
                           "AA", "BB", "CC", "DD",
                           "AA", "BB", "CC", "DD",
                           "AA", "BB", "CC", "DD"),
                    Order = c("zz", "ss", "tt", "qq",
                              "zz", "ss", "tt", "qq",
                              "zz", "ss", "tt", "qq",
                              "zz", "ss", "tt", "qq"))

希望这有助于您完成您的任务。

英文:
data <- data.frame(Q1_1 = c("abc",1,2,3,4),
Q1_2 = c("def",2,3,6,2),
Q2_1 = c("ghi",4,5,3,1),
Q2_2 = c("jkl",6,3,8,4),
ID = c("ID", "AA", "BB", "CC", "DD"),
Order = c("Order", "zz", "ss", "tt", "qq"))

The above resembles my data frame in terms of its layout, which should look like the one below. The actual data are saved as in a CSV file, once you read it into R with read.csv(), it would create a data frame like this.

  Q1_1 Q1_2 Q2_1 Q2_2 ID Order
1  abc  def  ghi  klm ID Order
2    1    2    4    6 AA    zz
3    2    3    5    3 BB    ss
4    3    6    3    8 CC    tt
5    4    2    1    4 DD    qq

The trickiest thing about it is that the data file has the first two rows as headers, and once it is read into R, it produces a data frame that presents the secondary header in the first row.

This is what I want to end up seeing. There are the codes that I manually wrote.

melted <- data.frame(response = c(1,2,3,4,2,3,6,2,4,5,3,1,6,3,8,4),
question_code = c("abc","abc","abc","abc",
"def","def","def","def",
"ghi","ghi","ghi","ghi",
"jkl","jkl","jkl","jkl"),
question_number = c("Q1_1","Q1_1","Q1_1","Q1_1",
"Q1_2","Q1_2","Q1_2","Q1_2",
"Q2_1","Q2_1","Q2_1","Q2_1",
"Q2_2","Q2_2","Q2_2","Q2_2"),
ID = c("AA", "BB", "CC", "DD",
"AA", "BB", "CC", "DD",
"AA", "BB", "CC", "DD",
"AA", "BB", "CC", "DD"),
Order = c("zz", "ss", "tt", "qq",
"zz", "ss", "tt", "qq",
"zz", "ss", "tt", "qq",
"zz", "ss", "tt", "qq"))

And it should look like this...

   response question_code question_number ID Order
1         1           abc            Q1_1 AA    zz
2         2           abc            Q1_1 BB    ss
3         3           abc            Q1_1 CC    tt
4         4           abc            Q1_1 DD    qq
5         2           def            Q1_2 AA    zz
6         3           def            Q1_2 BB    ss
7         6           def            Q1_2 CC    tt
8         2           def            Q1_2 DD    qq
9         4           ghi            Q2_1 AA    zz
10        5           ghi            Q2_1 BB    ss
11        3           ghi            Q2_1 CC    tt
12        1           ghi            Q2_1 DD    qq
13        6           jkl            Q2_2 AA    zz
14        3           jkl            Q2_2 BB    ss
15        8           jkl            Q2_2 CC    tt
16        4           jkl            Q2_2 DD    qq

So basically I want to stack the values ("response") into one column while creating four columns as references. Two of the four columns are question_code and question__name, which are transposed from the original two-header. The rest two column simply presents ID and Order (I do not know if the way I explained is understandable, but all the codes are here).

If the data file has only one row as header, I know how to do it with melt(), but this two-row header seems to largely complicates the situation.

答案1

得分: 0

data %>%
  mutate(across(starts_with('Q'), ~c(str_c(cur_column(), .[1], sep=':'), .[-1]))) %>%
  set_names(.[1,]) %>%
  slice(-1) %>%
  pivot_longer(starts_with('Q'), values_to = 'response', names_sep = ':',
               names_to = c("question_number", "question_code"))

# A tibble: 16 × 5
   ID    Order question_number question_code response
   <chr> <chr> <chr>           <chr>         <chr>   
 1 AA    zz    Q1_1            abc           1       
 2 AA    zz    Q1_2            def           2       
 3 AA    zz    Q2_1            ghi           4       
 4 AA    zz    Q2_2            jkl           6       
 5 BB    ss    Q1_1            abc           2       
 6 BB    ss    Q1_2            def           3       
 7 BB    ss    Q2_1            ghi           5       
 8 BB    ss    Q2_2            jkl           3       
 9 CC    tt    Q1_1            abc           3       
10 CC    tt    Q1_2            def           6       
11 CC    tt    Q2_1            ghi           3       
12 CC    tt    Q2_2            jkl           8       
13 DD    qq    Q1_1            abc           4       
14 DD    qq    Q1_2            def           2       
15 DD    qq    Q2_1            ghi           1       
16 DD    qq    Q2_2            jkl           4 
英文:
data %&gt;%
mutate(across(starts_with(&#39;Q&#39;), ~c(str_c(cur_column(),.[1], sep=&#39;:&#39;), .[-1]))) %&gt;%
set_names(.[1,]) %&gt;%
slice(-1) %&gt;%
pivot_longer(starts_with(&#39;Q&#39;), values_to = &#39;response&#39;, names_sep = &#39;:&#39;, 
names_to = c(&quot;question_number&quot;, &quot;question_code&quot;))
# A tibble: 16 &#215; 5
ID    Order question_number question_code response
&lt;chr&gt; &lt;chr&gt; &lt;chr&gt;           &lt;chr&gt;         &lt;chr&gt;   
1 AA    zz    Q1_1            abc           1       
2 AA    zz    Q1_2            def           2       
3 AA    zz    Q2_1            ghi           4       
4 AA    zz    Q2_2            jkl           6       
5 BB    ss    Q1_1            abc           2       
6 BB    ss    Q1_2            def           3       
7 BB    ss    Q2_1            ghi           5       
8 BB    ss    Q2_2            jkl           3       
9 CC    tt    Q1_1            abc           3       
10 CC    tt    Q1_2            def           6       
11 CC    tt    Q2_1            ghi           3       
12 CC    tt    Q2_2            jkl           8       
13 DD    qq    Q1_1            abc           4       
14 DD    qq    Q1_2            def           2       
15 DD    qq    Q2_1            ghi           1       
16 DD    qq    Q2_2            jkl           4    

答案2

得分: 0

请检查替代方法

# 获取名称的第一行
row <- data[1,]

# 将第一行分配为数据集的名称
names(data) <- ifelse(!names(data) %in% c('ID','Order'), paste0(names(data),'#',row), names(data))

# 删除第一行
data2 <- data[2:nrow(data),]

# 转置数据
data3 <- data2 %>% pivot_longer(cols = starts_with('Q')) %>% 
  separate(name, into = c('question_number','question_code'), sep='#') %>% rename(response=value)

创建于2023年07月11日,使用 reprex v2.0.2;

# 一个 tibble: 16 × 5
   ID    Order question_number question_code response
   <chr> <chr> <chr>           <chr>         <chr>   
 1 AA    zz    Q1_1            abc           1       
 2 AA    zz    Q1_2            def           2       
 3 AA    zz    Q2_1            ghi           4       
 4 AA    zz    Q2_2            jkl           6       
 5 BB    ss    Q1_1            abc           2       
 6 BB    ss    Q1_2            def           3       
 7 BB    ss    Q2_1            ghi           5       
 8 BB    ss    Q2_2            jkl           3       
 9 CC    tt    Q1_1            abc           3       
10 CC    tt    Q1_2            def           6       
11 CC    tt    Q2_1            ghi           3       
12 CC    tt    Q2_2            jkl           8       
13 DD    qq    Q1_1            abc           4       
14 DD    qq    Q1_2            def           2       
15 DD    qq    Q2_1            ghi           1       
16 DD    qq    Q2_2            jkl           4       
英文:

Please check the alternate approach

# get the first row for names
row &lt;- data[1,]

# assign the first row as names of the dataset
names(data) &lt;- ifelse(!names(data) %in% c(&#39;ID&#39;,&#39;Order&#39;) ,paste0(names(data),&#39;#&#39;,row), names(data))

# remove the first row
data2 &lt;- data[2:nrow(data),]

# transpose the data
data3 &lt;- data2 %&gt;% pivot_longer(cols = starts_with(&#39;Q&#39;)) %&gt;% 
  separate(name, into = c(&#39;question_number&#39;,&#39;question_code&#39;), sep=&#39;#&#39;) %&gt;% rename(response=value)

<sup>Created on 2023-07-11 with reprex v2.0.2</sup>

# A tibble: 16 &#215; 5
   ID    Order question_number question_code response
   &lt;chr&gt; &lt;chr&gt; &lt;chr&gt;           &lt;chr&gt;         &lt;chr&gt;   
 1 AA    zz    Q1_1            abc           1       
 2 AA    zz    Q1_2            def           2       
 3 AA    zz    Q2_1            ghi           4       
 4 AA    zz    Q2_2            jkl           6       
 5 BB    ss    Q1_1            abc           2       
 6 BB    ss    Q1_2            def           3       
 7 BB    ss    Q2_1            ghi           5       
 8 BB    ss    Q2_2            jkl           3       
 9 CC    tt    Q1_1            abc           3       
10 CC    tt    Q1_2            def           6       
11 CC    tt    Q2_1            ghi           3       
12 CC    tt    Q2_2            jkl           8       
13 DD    qq    Q1_1            abc           4       
14 DD    qq    Q1_2            def           2       
15 DD    qq    Q2_1            ghi           1       
16 DD    qq    Q2_2            jkl           4       

huangapple
  • 本文由 发表于 2023年7月11日 12:18:11
  • 转载请务必保留本文链接:https://go.coder-hub.com/76658684.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定