问题

The gs_name 列的值是重复的，可能对应于hallmark数据框中的一个或多个行。我想保留只有在gs_name与少于25行或多于500行对应的数据框行。

for (i in hallmark$gs_name) {
  if (25 &lt;= nrow(hallmark) &gt;= 500) {
    subset.df &lt;- hallmark
  }
}

输入：

&gt; dput(hallmark[c(1:5,300:305),3:4])
structure(list(gs_name = c("adipogenesis", "adipogenesis", "adipogenesis", 
"adipogenesis", "adipogenesis", "bile_acid_metabolism", "bile_acid_metabolism", 
"bile_acid_metabolism", "bile_acid_metabolism", "bile_acid_metabolism", 
"bile_acid_metabolism"), gene_symbol = c("ABCA1", "ABCB8", "ACAA2", 
"ACADL", "ACADM", "HSD17B4", "HSD17B6", "HSD3B1", "HSD3B7", "IDH1", 
"IDH2")), row.names = c(NA, -11L), class = c("tbl_df", "tbl", 
"data.frame"))

英文:

The gs_name column value is repetitive and may correspond to one or more rows in the hallmark df. I want to keep only the rows of the dataframe where gs_name corresponds with less than to 25 rows or more than 500 rows.

for (i in hallmark$gs_name) {
  if (25 &lt;= nrow(hallmark) &gt;= 500) {
    subset.df &lt;- hallmark
  }
}

Input:

&gt; dput(hallmark[c(1:5,300:305),3:4])
structure(list(gs_name = c(&quot;adipogenesis&quot;, &quot;adipogenesis&quot;, &quot;adipogenesis&quot;, 
&quot;adipogenesis&quot;, &quot;adipogenesis&quot;, &quot;bile_acid_metabolism&quot;, &quot;bile_acid_metabolism&quot;, 
&quot;bile_acid_metabolism&quot;, &quot;bile_acid_metabolism&quot;, &quot;bile_acid_metabolism&quot;, 
&quot;bile_acid_metabolism&quot;), gene_symbol = c(&quot;ABCA1&quot;, &quot;ABCB8&quot;, &quot;ACAA2&quot;, 
&quot;ACADL&quot;, &quot;ACADM&quot;, &quot;HSD17B4&quot;, &quot;HSD17B6&quot;, &quot;HSD3B1&quot;, &quot;HSD3B7&quot;, &quot;IDH1&quot;, 
&quot;IDH2&quot;)), row.names = c(NA, -11L), class = c(&quot;tbl_df&quot;, &quot;tbl&quot;, 
&quot;data.frame&quot;))

答案1

得分: 1

**data.table**

    library(data.table)
    setDT(hallmark )[, .SD[.N &lt;= 25 | .N &gt;= 500], by = gs_name]

英文:

additional solution option

data.table

library(data.table)
setDT(hallmark )[, .SD[.N &lt;= 25 | .N &gt;= 500], by = gs_name]

答案2

得分: 0

最简单的方法是按照 gs_name 对数据进行分组，并使用 dplyr 函数 n() 来满足你的两个条件，即：

library(dplyr)

hallmark &lt;- structure(list(gs_name = c(&quot;adipogenesis&quot;, &quot;adipogenesis&quot;, &quot;adipogenesis&quot;, 
                           &quot;adipogenesis&quot;, &quot;adipogenesis&quot;, &quot;bile_acid_metabolism&quot;, &quot;bile_acid_metabolism&quot;, 
                           &quot;bile_acid_metabolism&quot;, &quot;bile_acid_metabolism&quot;, &quot;bile_acid_metabolism&quot;, 
                           &quot;bile_acid_metabolism&quot;), gene_symbol = c(&quot;ABCA1&quot;, &quot;ABCB8&quot;, &quot;ACAA2&quot;, 
                                                                    &quot;ACADL&quot;, &quot;ACADM&quot;, &quot;HSD17B4&quot;, &quot;HSD17B6&quot;, &quot;HSD3B1&quot;, &quot;HSD3B7&quot;, &quot;IDH1&quot;, 
                                                                    &quot;IDH2&quot;)), row.names = c(NA, -11L), class = c(&quot;tbl_df&quot;, &quot;tbl&quot;, 
                                                                                                                 &quot;data.frame&quot;))

hallmark %&gt;%
  group_by(gs_name) %&gt;%
  filter(n() &lt;= 25 | n() &gt;= 500) %&gt;%
  ungroup()
#&gt; # A tibble: 11 &#215; 2
#&gt;    gs_name              gene_symbol
#&gt;    &lt;chr&gt;                &lt;chr&gt;      
#&gt;  1 adipogenesis         ABCA1      
#&gt;  2 adipogenesis         ABCB8      
#&gt;  3 adipogenesis         ACAA2      
#&gt;  4 adipogenesis         ACADL      
#&gt;  5 adipogenesis         ACADM      
#&gt;  6 bile_acid_metabolism HSD17B4    
#&gt;  7 bile_acid_metabolism HSD17B6    
#&gt;  8 bile_acid_metabolism HSD3B1     
#&gt;  9 bile_acid_metabolism HSD3B7     
#&gt; 10 bile_acid_metabolism IDH1       
#&gt; 11 bile_acid_metabolism IDH2

<sup>创建于2023年07月18日，使用 reprex v2.0.2</sup>

英文:

Easiest way would be to group your data by gs_name and use the dplyr function n() for your two conditions, i.e.

library(dplyr)

hallmark &lt;- structure(list(gs_name = c(&quot;adipogenesis&quot;, &quot;adipogenesis&quot;, &quot;adipogenesis&quot;, 
                           &quot;adipogenesis&quot;, &quot;adipogenesis&quot;, &quot;bile_acid_metabolism&quot;, &quot;bile_acid_metabolism&quot;, 
                           &quot;bile_acid_metabolism&quot;, &quot;bile_acid_metabolism&quot;, &quot;bile_acid_metabolism&quot;, 
                           &quot;bile_acid_metabolism&quot;), gene_symbol = c(&quot;ABCA1&quot;, &quot;ABCB8&quot;, &quot;ACAA2&quot;, 
                                                                    &quot;ACADL&quot;, &quot;ACADM&quot;, &quot;HSD17B4&quot;, &quot;HSD17B6&quot;, &quot;HSD3B1&quot;, &quot;HSD3B7&quot;, &quot;IDH1&quot;, 
                                                                    &quot;IDH2&quot;)), row.names = c(NA, -11L), class = c(&quot;tbl_df&quot;, &quot;tbl&quot;, 
                                                                                                                 &quot;data.frame&quot;))

hallmark %&gt;%
  group_by(gs_name) %&gt;%
  filter(n() &lt;= 25 | n() &gt;= 500) %&gt;%
  ungroup()
#&gt; # A tibble: 11 &#215; 2
#&gt;    gs_name              gene_symbol
#&gt;    &lt;chr&gt;                &lt;chr&gt;      
#&gt;  1 adipogenesis         ABCA1      
#&gt;  2 adipogenesis         ABCB8      
#&gt;  3 adipogenesis         ACAA2      
#&gt;  4 adipogenesis         ACADL      
#&gt;  5 adipogenesis         ACADM      
#&gt;  6 bile_acid_metabolism HSD17B4    
#&gt;  7 bile_acid_metabolism HSD17B6    
#&gt;  8 bile_acid_metabolism HSD3B1     
#&gt;  9 bile_acid_metabolism HSD3B7     
#&gt; 10 bile_acid_metabolism IDH1       
#&gt; 11 bile_acid_metabolism IDH2

<sup>Created on 2023-07-18 with reprex v2.0.2</sup>

通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库，让每个人都能够通过互相帮助和分享经验来进步。

Subset rows based on number of occurrence in specific column

问题

答案1

答案2

如何将一个字符转换为日期类？

如何在R中有条件地复制和编辑行

只保留R中每个组中的最后一个重复项。

在R Markdown中，将特定的无名称代码块之间的分隔符从换行符更改为空格。

What's the correct way to type hint an empty list as a literal in python?

如何在Highcharts Gantt中更改本地化的星期名称

如何在同一个流中使用多个过滤器和映射函数？

如何使用Map/Set来将代码优化到O(n)？

.NET MAUI Android在GitHub Actions上构建失败，错误代码为1。

如何在Playwright视觉比较中屏蔽多个定位器？

在C++中，可以使用可变模板参数来检索类型的内部类型。

selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found

Creating and opening a URL to log in to Website via Basic Auth with Robot Framework/Selenium (Python)

AG Grid 在上下文菜单中以大文本形式打开

发表评论