英文:
Error in `[[<-`(`*tmp*`, i, value = sub("\\_.*", "", i)) : attempt to select more than one element in vectorIndex
问题
The Sample.ID
column in the sample.sheet
dataframe corresponds to the clin.info
dataframe and may be duplicated. The File.Name
column of the sample.sheet
dataframe contains a substring that is extracted as basename
. Only duplicate basename
prefixes are retained for further analyses.
First, I find the rows of sample.sheet
and clin.info
with common Sample.ID
information.
sample.sheet <- sample.sheet[sample.sheet$Sample.ID %in% clin.info$Sample.ID, ]
clin.info <- clin.info[clin.info$Sample.ID %in% sample.sheet$Sample.ID, ]
Then, I remove the substring and retain only samples with duplicate prefixes.
rgx_undsc <- paste0(
"_[^_]*$"
)
basename <- sample.sheet %>%
# Add a column with everything (the prefix) before the "_" (the suffix).
mutate(file_prefix = str_remove(File.Name, rgx_undsc)) %>%
# Count the occurrences of each prefix.
group_by(file_prefix) %>%
summarize(n = n()) %>%
ungroup() %>%
# Include only those prefixes with multiple occurrences: duplicates.
filter(n > 1) %>%
select(file_prefix)
If Sample.ID
has more than one pair, take the first pair.
for(i in sample.sheet %>% group_by(Sample.ID) %>% filter(n() > 2)){
# Get the pairs
pairs[[i]] <- sub("\\_.*", "", i)
basename <- cbind(basename, pairs[[1]])
}
Traceback:
Error in `[[<-`(`*tmp*`, i, value = sub("\\_.*", "", i)) :
attempt to select more than one element in vectorIndex
Data:
Example:
> dput(sample.sheet)
structure(list(File.Name = c("9d797850-f4e8-4799-8e36-f9611ce9ef99_noid_Red.idat",
"fdd60aca-ac18-43c0-b191-94fca93fb0b4_noid_Grn.idat", "fdd60aca-ac18-43c0-b191-94fca93fb0b4_noid_Red.idat",
"4086487a-70af-4091-8691-a04adbdefd94_noid_Red.idat", "9514c72b-27c9-4bff-bb8c-4cd809ab99cf_noid_Grn.idat",
"9514c72b-27c9-4bff-bb8c-4cd809ab99cf_noid_Red.idat", "ca04f909-7afa-4feb-b160-ae0e363e2004_noid_Grn.idat",
"295b1e9d-315d-4f87-8a86-38a9c2264f00_noid_Grn.idat", "6cab56c6-d37b-4bb2-b575-c0c924fe312d_noid_Grn.idat",
"71578b77-2494-4d65-8187-809a9277eb2f_noid_Grn.idat", "182cce6f-eb18-4c51-9c8d-47a0770ef79d_noid_Grn.idat",
"19d2e9e5-796e-4df5-96aa-68f5d0fa1855_noid_Grn.idat", "7dc90b83-0171-4600-883a-8de97c0e57cc_noid_Grn.idat",
"a3b85dd8-5621-4665-bafd-7edcf7c05aa4_noid_Red.idat", "b53495eb-ca7a-455b-b45c-338ef3689ca2_noid_Grn.idat",
"69df812d-f5cc-4c1d-ac76-01530e7df677_noid_Red.idat", "69df812d-f5cc-4c1d-ac76-01530e7df677_noid_Grn.idat",
"5723e331-d8e6-4687-ba89-3c0e9a8c9317_noid_Grn.idat", "6445036e-69bf-4f60-a1c1-b2506e32e53c_noid_Red.idat",
"c65756c7-7ef4-4c90-ac3a-15cacf6fa83e_noid_Red.idat", "4d87a92b-998b-4af6-a45b-a1cc95719597_noid_Grn.idat",
"e26a7836-17af-4498-8d69-
<details>
<summary>英文:</summary>
The `Sample.ID` column in the `sample.sheet` dataframe correspond with that in `clin.info` dataframe and may be duplicated. The `File.Name` column of the `sample.sheet` dataframe contains a substring which is extracted as `basename`. Only duplicate `basename` prefixes are retained for further analyses.
First, I find the rows of `sample.sheet` and `clin.info` with common `Sample.ID` info.
sample.sheet <- sample.sheet[sample.sheet$Sample.ID %in% clin.info$Sample.ID, ]
clin.info <- clin.info[clin.info$Sample.ID %in% sample.sheet$Sample.ID, ]
Then, I remove the substring and retain only samples with duplicate prefix.
rgx_undsc <- paste0(
"_[^_]*$"
)
basename <- sample.sheet %>%
# Add a column with everything (the prefix) before the "_" (the suffix).
mutate(file_prefix = str_remove(File.Name, rgx_undsc)) %>%
# Count the occurrences of each prefix.
group_by(file_prefix) %>%
summarize(n = n()) %>%
ungroup() %>%
# Include only those prefixes with multiple occurrences: duplicates.
filter(n > 1) %>%
select(file_prefix)
**If `Sample.ID` has more than one pair, take the first pair.**
for(i in sample.sheet %>% group_by(Sample.ID) %>% filter(n()>2)){
# Get the pairs
pairs[[i]] <- sub("\\_.*", "", i)
basename <- cbind(basename, pairs[[1]])
}
Traceback:
Error in `[[<-`(`*tmp*`, i, value = sub("\\_.*", "", i)) :
attempt to select more than one element in vectorIndex
Data:
Example:
> dput(sample.sheet)
structure(list(File.Name = c("9d797850-f4e8-4799-8e36-f9611ce9ef99_noid_Red.idat",
"fdd60aca-ac18-43c0-b191-94fca93fb0b4_noid_Grn.idat", "fdd60aca-ac18-43c0-b191-94fca93fb0b4_noid_Red.idat",
"4086487a-70af-4091-8691-a04adbdefd94_noid_Red.idat", "9514c72b-27c9-4bff-bb8c-4cd809ab99cf_noid_Grn.idat",
"9514c72b-27c9-4bff-bb8c-4cd809ab99cf_noid_Red.idat", "ca04f909-7afa-4feb-b160-ae0e363e2004_noid_Grn.idat",
"295b1e9d-315d-4f87-8a86-38a9c2264f00_noid_Grn.idat", "6cab56c6-d37b-4bb2-b575-c0c924fe312d_noid_Grn.idat",
"71578b77-2494-4d65-8187-809a9277eb2f_noid_Grn.idat", "182cce6f-eb18-4c51-9c8d-47a0770ef79d_noid_Grn.idat",
"19d2e9e5-796e-4df5-96aa-68f5d0fa1855_noid_Grn.idat", "7dc90b83-0171-4600-883a-8de97c0e57cc_noid_Grn.idat",
"a3b85dd8-5621-4665-bafd-7edcf7c05aa4_noid_Red.idat", "b53495eb-ca7a-455b-b45c-338ef3689ca2_noid_Grn.idat",
"69df812d-f5cc-4c1d-ac76-01530e7df677_noid_Red.idat", "69df812d-f5cc-4c1d-ac76-01530e7df677_noid_Grn.idat",
"5723e331-d8e6-4687-ba89-3c0e9a8c9317_noid_Grn.idat", "6445036e-69bf-4f60-a1c1-b2506e32e53c_noid_Red.idat",
"c65756c7-7ef4-4c90-ac3a-15cacf6fa83e_noid_Red.idat", "4d87a92b-998b-4af6-a45b-a1cc95719597_noid_Grn.idat",
"e26a7836-17af-4498-8d69-45e86961da43_noid_Red.idat", "f9233201-9029-42f1-94e1-57758062afc4_noid_Grn.idat",
"f9233201-9029-42f1-94e1-57758062afc4_noid_Red.idat", "dccbe187-756d-402e-ba7f-b291f04f62c4_noid_Grn.idat",
"ca04f909-7afa-4feb-b160-ae0e363e2004_noid_Red.idat", "993cba91-83c5-4383-8040-c490bd4ba0cf_noid_Red.idat",
"71578b77-2494-4d65-8187-809a9277eb2f_noid_Red.idat", "913f45bd-4ac2-4411-a108-0bd1dbc16b32_noid_Grn.idat",
"913f45bd-4ac2-4411-a108-0bd1dbc16b32_noid_Red.idat", "7dc90b83-0171-4600-883a-8de97c0e57cc_noid_Red.idat",
"2f29fd48-decc-40a1-8069-47dd47996474_noid_Grn.idat", "d5da562f-54db-4081-b687-81dcf8b4f312_noid_Red.idat",
"13182c26-fe7f-43d3-9e0b-8d1cc980fcb8_noid_Grn.idat", "9d797850-f4e8-4799-8e36-f9611ce9ef99_noid_Grn.idat",
"b53495eb-ca7a-455b-b45c-338ef3689ca2_noid_Red.idat", "06f925c2-a194-4efe-98de-6a6816a64fca_noid_Red.idat",
"06f925c2-a194-4efe-98de-6a6816a64fca_noid_Grn.idat", "13182c26-fe7f-43d3-9e0b-8d1cc980fcb8_noid_Red.idat",
"6cab56c6-d37b-4bb2-b575-c0c924fe312d_noid_Red.idat", "6a544ba3-bb12-4995-a222-b5645505bda2_noid_Grn.idat",
"6a544ba3-bb12-4995-a222-b5645505bda2_noid_Red.idat", "dccbe187-756d-402e-ba7f-b291f04f62c4_noid_Red.idat",
"59e7d51f-1d3c-4033-b8a6-3c8c77e9b535_noid_Red.idat", "2c5c53d7-4812-4c05-8eb6-712ef8a48e50_noid_Grn.idat",
"af119d07-c7aa-4521-864f-d40b18f587fd_noid_Red.idat", "6a0d5d91-8f46-4eb5-a600-a586dc7b9801_noid_Grn.idat",
"33c04c3b-5980-47f9-85dd-d82828553e64_noid_Red.idat", "bdff5207-9d46-4b7f-ba63-cf32704363e7_noid_Grn.idat",
"30728e69-5643-4cb8-825c-0c163c3a9333_noid_Grn.idat", "9217713e-0423-45af-bad1-766d1205da3a_noid_Red.idat",
"48c5eedb-542c-438e-8e92-85ea64e34776_noid_Grn.idat", "48c5eedb-542c-438e-8e92-85ea64e34776_noid_Red.idat",
"e5641b0a-95af-4389-a890-4ed06cfab944_noid_Grn.idat", "e5641b0a-95af-4389-a890-4ed06cfab944_noid_Red.idat",
"c1e9a16d-7244-4dbf-a8a8-36a4bf94862b_noid_Red.idat", "314c0ff5-3c8e-4b01-83f9-019f6e2fcdea_noid_Red.idat",
"eeb65f09-5dc8-49b3-817a-eec283ff5bf9_noid_Grn.idat", "5723e331-d8e6-4687-ba89-3c0e9a8c9317_noid_Red.idat",
"9cd0669a-ede3-4bf4-b83f-80b4990a5edc_noid_Red.idat", "40510fe4-cd15-40fb-8a87-7c27e990c0a9_noid_Red.idat",
"5b856c08-b220-4d0e-8a90-0b5cdbee105e_noid_Grn.idat", "2c5c53d7-4812-4c05-8eb6-712ef8a48e50_noid_Red.idat",
"ccd4e941-753b-4ebf-ac98-f3a989266c50_noid_Grn.idat", "c0d8d4e6-a0cd-43f4-8b1c-c7a695603165_noid_Grn.idat",
"9217713e-0423-45af-bad1-766d1205da3a_noid_Grn.idat", "4c1af618-e0e7-4c2e-b7ff-1928fe3bf77a_noid_Grn.idat",
"4c1af618-e0e7-4c2e-b7ff-1928fe3bf77a_noid_Red.idat", "4fd551c8-c6ca-4068-afb9-610677a68c7b_noid_Grn.idat",
"6f03dfc0-2510-41de-808a-95b3216e1dfa_noid_Grn.idat", "4086487a-70af-4091-8691-a04adbdefd94_noid_Grn.idat",
"6f03dfc0-2510-41de-808a-95b3216e1dfa_noid_Red.idat", "993cba91-83c5-4383-8040-c490bd4ba0cf_noid_Grn.idat",
"7bd14ee3-3981-467c-8fe7-113b4d29fb66_noid_Red.idat", "8c77b9d2-26ba-431b-aecf-7a81fa0455ee_noid_Grn.idat",
"29951bb6-c71c-462c-b620-8cae18eb0962_noid_Red.idat", "0927a4a1-e872-4674-9905-6310754bcc05_noid_Red.idat",
"d3c443db-25e7-46ae-bd0c-9978390a88f8_noid_Grn.idat", "c80cc68d-dde9-4cff-98c5-977098a70eaa_noid_Grn.idat",
"395c0b4f-00ac-4602-a17e-3bdb19aae54d_noid_Grn.idat", "0927a4a1-e872-4674-9905-6310754bcc05_noid_Grn.idat",
"95d1a257-2641-4aa3-a732-b1f5f5732702_noid_Red.idat", "43098746-2944-43cf-b3bf-bf5a92861c32_noid_Red.idat",
"f70274fc-3a63-4e6a-a5f0-be64995a119f_noid_Grn.idat", "f70274fc-3a63-4e6a-a5f0-be64995a119f_noid_Red.idat",
"c1e9a16d-7244-4dbf-a8a8-36a4bf94862b_noid_Grn.idat", "a1882266-9e40-4c99-b162-35a183201eaa_noid_Grn.idat",
"09eb47e3-510a-4491-99c6-d97e2e2e521c_noid_Red.idat", "314c0ff5-3c8e-4b01-83f9-019f6e2fcdea_noid_Grn.idat",
"eeb65f09-5dc8-49b3-817a-eec283ff5bf9_noid_Red.idat", "6445036e-69bf-4f60-a1c1-b2506e32e53c_noid_Grn.idat",
"f995f934-3a82-4c2b-93af-370714103356_noid_Grn.idat", "5b856c08-b220-4d0e-8a90-0b5cdbee105e_noid_Red.idat",
"9fd697da-b3b2-408a-9b6e-c94f36824f43_noid_Grn.idat", "ccd4e941-753b-4ebf-ac98-f3a989266c50_noid_Red.idat",
"29951bb6-c71c-462c-b620-8cae18eb0962_noid_Grn.idat", "73f6dbe1-c74f-4337-a379-78aa0970d9e6_noid_Grn.idat",
"73f6dbe1-c74f-4337-a379-78aa0970d9e6_noid_Red.idat", "40510fe4-cd15-40fb-8a87-7c27e990c0a9_noid_Grn.idat",
"0cd956b6-e061-4439-874e-9f5c09134117_noid_Red.idat", "0cd956b6-e061-4439-874e-9f5c09134117_noid_Grn.idat",
"4fd551c8-c6ca-4068-afb9-610677a68c7b_noid_Red.idat", "f67dbc0a-b969-48f5-86e0-d99bef15c543_noid_Grn.idat",
"1a20d2b3-5c03-4710-8aeb-2002c921a0ed_noid_Grn.idat", "a3b85dd8-5621-4665-bafd-7edcf7c05aa4_noid_Grn.idat",
"b21f822a-6dfd-4146-bd22-11449585ea9d_noid_Red.idat", "4313ff18-adc2-4cbc-a2d9-fe14df29a161_noid_Red.idat",
"da6348bb-d5b1-4dd5-8bd0-008c3b1a992d_noid_Grn.idat", "da6348bb-d5b1-4dd5-8bd0-008c3b1a992d_noid_Red.idat",
"6b022609-0fb2-4147-9dd4-d7d49dcabe0a_noid_Red.idat", "6b022609-0fb2-4147-9dd4-d7d49dcabe0a_noid_Grn.idat",
"f67dbc0a-b969-48f5-86e0-d99bef15c543_noid_Red.idat", "b21f822a-6dfd-4146-bd22-11449585ea9d_noid_Grn.idat",
"06e7989a-094f-4da4-9b90-88b328f3c359_noid_Red.idat", "295b1e9d-315d-4f87-8a86-38a9c2264f00_noid_Red.idat",
"1a20d2b3-5c03-4710-8aeb-2002c921a0ed_noid_Red.idat", "19d2e9e5-796e-4df5-96aa-68f5d0fa1855_noid_Red.idat",
"4313ff18-adc2-4cbc-a2d9-fe14df29a161_noid_Grn.idat", "2f29fd48-decc-40a1-8069-47dd47996474_noid_Red.idat",
"d5da562f-54db-4081-b687-81dcf8b4f312_noid_Grn.idat", "06e7989a-094f-4da4-9b90-88b328f3c359_noid_Grn.idat",
"0f2e4e7c-8907-448f-8efb-39f80e0842e9_noid_Red.idat", "0f2e4e7c-8907-448f-8efb-39f80e0842e9_noid_Grn.idat",
"9fd697da-b3b2-408a-9b6e-c94f36824f43_noid_Red.idat", "af119d07-c7aa-4521-864f-d40b18f587fd_noid_Grn.idat",
"9c3d7d1c-177d-44b6-b864-d9197d0d7e96_noid_Red.idat", "9c3d7d1c-177d-44b6-b864-d9197d0d7e96_noid_Grn.idat",
"d3c443db-25e7-46ae-bd0c-9978390a88f8_noid_Red.idat", "c65756c7-7ef4-4c90-ac3a-15cacf6fa83e_noid_Grn.idat",
"4d87a92b-998b-4af6-a45b-a1cc95719597_noid_Red.idat", "e26a7836-17af-4498-8d69-45e86961da43_noid_Grn.idat",
"23b872d1-b322-48bd-b04c-b53eecc8872c_noid_Grn.idat", "23b872d1-b322-48bd-b04c-b53eecc8872c_noid_Red.idat",
"3e61019c-1c6f-4dd9-94cc-1f0ae354a761_noid_Red.idat", "a1882266-9e40-4c99-b162-35a183201eaa_noid_Red.idat",
"09eb47e3-510a-4491-99c6-d97e2e2e521c_noid_Grn.idat", "7bd14ee3-3981-467c-8fe7-113b4d29fb66_noid_Grn.idat",
"8c77b9d2-26ba-431b-aecf-7a81fa0455ee_noid_Red.idat", "9cd0669a-ede3-4bf4-b83f-80b4990a5edc_noid_Grn.idat",
"af3b88bb-3af5-4782-8ce2-b8d0d6501c31_noid_Red.idat", "af3b88bb-3af5-4782-8ce2-b8d0d6501c31_noid_Grn.idat",
"f995f934-3a82-4c2b-93af-370714103356_noid_Red.idat", "3e61019c-1c6f-4dd9-94cc-1f0ae354a761_noid_Grn.idat",
"182cce6f-eb18-4c51-9c8d-47a0770ef79d_noid_Red.idat", "43098746-2944-43cf-b3bf-bf5a92861c32_noid_Grn.idat",
"c80cc68d-dde9-4cff-98c5-977098a70eaa_noid_Red.idat", "395c0b4f-00ac-4602-a17e-3bdb19aae54d_noid_Red.idat",
"1b8c9552-7a5f-492a-9d68-1f105cfbbe30_noid_Red.idat", "c0d8d4e6-a0cd-43f4-8b1c-c7a695603165_noid_Red.idat",
"1b8c9552-7a5f-492a-9d68-1f105cfbbe30_noid_Grn.idat", "95d1a257-2641-4aa3-a732-b1f5f5732702_noid_Grn.idat",
"0852c194-416f-48f9-9b4f-652dde1b9a7f_noid_Grn.idat", "0852c194-416f-48f9-9b4f-652dde1b9a7f_noid_Red.idat",
"923a6127-4a0c-4319-83e8-fade2e24b691_noid_Red.idat", "9424d4c5-8aee-43e8-90d4-5b9e5c407a8b_noid_Red.idat",
"7ea080af-a729-4adf-b94a-654b4ce864d5_noid_Grn.idat", "7f27082e-e3a5-48e6-9309-c49d1372b6c3_noid_Grn.idat",
"6d987585-1159-48ea-bf6d-631b3b9fa2c2_noid_Grn.idat", "ec3d51f8-d565-41ec-be42-8c0f6ee2ed98_noid_Grn.idat",
"ec3d51f8-d565-41ec-be42-8c0f6ee2ed98_noid_Red.idat", "8e14d127-11c1-4899-934d-3a602d71a5b0_noid_Grn.idat",
"923a6127-4a0c-4319-83e8-fade2e24b691_noid_Grn.idat", "7a82ce66-4f2f-4ff2-b05d-4dbf8b36006a_noid_Grn.idat",
"cbaee7df-f1ba-4fdc-a626-c06b18ccbcef_noid_Red.idat", "2d86f958-19c7-44a0-ad87-898c6dc1821c_noid_Red.idat",
"a86a64e1-c38d-4e64-ac83-5d9280aa94af_noid_Red.idat", "30728e69-5643-4cb8-825c-0c163c3a9333_noid_Red.idat",
"8e14d127-11c1-4899-934d-3a602d71a5b0_noid_Red.idat", "b5f41af9-03ab-4388-997d-e2d631ec9241_noid_Red.idat",
"b5f41af9-03ab-4388-997d-e2d631ec9241_noid_Grn.idat", "59e7d51f-1d3c-4033-b8a6-3c8c77e9b535_noid_Grn.idat",
"78c54354-7e99-4a11-ac13-8d3e6102f537_noid_Red.idat", "7a82ce66-4f2f-4ff2-b05d-4dbf8b36006a_noid_Red.idat",
"b32d74cb-4e02-46ab-92a2-84e7a1327e35_noid_Red.idat", "f16b00bf-c235-4076-b6bc-d540fbbee6bb_noid_Red.idat",
"ed8dca7a-ed1e-48a6-9039-77a8788131d6_noid_Grn.idat", "33c04c3b-5980-47f9-85dd-d82828553e64_noid_Grn.idat",
"6a0d5d91-8f46-4eb5-a600-a586dc7b9801_noid_Red.idat", "20abec25-0742-4967-8b7a-b400a77fedb7_noid_Grn.idat",
"20abec25-0742-4967-8b7a-b400a77fedb7_noid_Red.idat", "7f27082e-e3a5-48e6-9309-c49d1372b6c3_noid_Red.idat",
"b47bd875-8c53-474c-82c3-7c33bb36e33e_noid_Grn.idat", "bdff5207-9d46-4b7f-ba63-cf32704363e7_noid_Red.idat",
"78c54354-7e99-4a11-ac13-8d3e6102f537_noid_Grn.idat", "b32d74cb-4e02-46ab-92a2-84e7a1327e35_noid_Grn.idat",
"cbaee7df-f1ba-4fdc-a626-c06b18ccbcef_noid_Grn.idat", "9424d4c5-8aee-43e8-90d4-5b9e5c407a8b_noid_Grn.idat",
"9eed0834-f7e3-4a9d-bc6c-8da77260cb15_noid_Grn.idat", "aa54459b-64a6-4bc4-a0f2-aa26d962c5e1_noid_Grn.idat",
"2d86f958-19c7-44a0-ad87-898c6dc1821c_noid_Grn.idat", "a86a64e1-c38d-4e64-ac83-5d9280aa94af_noid_Grn.idat",
"b47bd875-8c53-474c-82c3-7c33bb36e33e_noid_Red.idat", "d0995d0b-db6b-4989-b166-b1fc0df3e49d_noid_Grn.idat",
"d0995d0b-db6b-4989-b166-b1fc0df3e49d_noid_Red.idat", "f16b00bf-c235-4076-b6bc-d540fbbee6bb_noid_Grn.idat",
"f5fb1851-4687-4163-9b1c-ae687a27e18a_noid_Grn.idat", "f5fb1851-4687-4163-9b1c-ae687a27e18a_noid_Red.idat",
"9eed0834-f7e3-4a9d-bc6c-8da77260cb15_noid_Red.idat", "ed8dca7a-ed1e-48a6-9039-77a8788131d6_noid_Red.idat",
"aa54459b-64a6-4bc4-a0f2-aa26d962c5e1_noid_Red.idat", "7ea080af-a729-4adf-b94a-654b4ce864d5_noid_Red.idat",
"e697de99-2c53-41f1-8407-30b417df666b_noid_Red.idat", "e697de99-2c53-41f1-8407-30b417df666b_noid_Grn.idat",
"6d987585-1159-48ea-bf6d-631b3b9fa2c2_noid_Red.idat"), Sample.ID = c("TCGA.CZ.5986.01",
"TCGA.BP.5176.01", "TCGA.BP.5176.01", "TCGA.A3.3387.01", "TCGA.B8.4153.01",
"TCGA.B8.4153.01", "TCGA.CZ.5451.01", "TCGA.CZ.5468.01", "TCGA.CW.5580.01",
"TCGA.CW.5590.01", "TCGA.B0.4698.01", "TCGA.CJ.4920.01", "TCGA.CZ.5459.01",
"TCGA.CW.5587.01", "TCGA.B0.5705.01", "TCGA.EU.5906.01", "TCGA.EU.5906.01",
"TCGA.B0.5115.01", "TCGA.CZ.5466.01", "TCGA.B0.4811.01", "TCGA.A3.3357.01",
"TCGA.CZ.5453.01", "TCGA.B2.5635.01", "TCGA.B2.5635.01", "TCGA.BP.4801.01",
"TCGA.CZ.5451.01", "TCGA.CZ.4853.01", "TCGA.CW.5590.01", "TCGA.G6.A8L8.01",
"TCGA.G6.A8L8.01", "TCGA.CZ.5459.01", "TCGA.B0.5690.01", "TCGA.B0.5702.01",
"TCGA.BP.5168.01", "TCGA.CZ.5986.01", "TCGA.B0.5705.01", "TCGA.B8.5164.01",
"TCGA.B8.5164.01", "TCGA.BP.5168.01", "TCGA.CW.5580.01", "TCGA.B0.5692.01",
"TCGA.B0.5692.01", "TCGA.BP.4801.01", "TCGA.CJ.4912.01", "TCGA.BP.5182.01",
"TCGA.B0.5098.01", "TCGA.B0.5092.01", "TCGA.CJ.5677.01", "TCGA.DV.A4VX.01",
"TCGA.DV.5565.01", "TCGA.B0.5094.01", "TCGA.B0.4821.01", "TCGA.B0.4821.01",
"TCGA.B2.3924.01", "TCGA.B2.3924.01", "TCGA.B0.4710.01", "TCGA.CZ.4865.01",
"TCGA.EU.5905.01", "TCGA.B0.5115.01", "TCGA.CJ.6033.01", "TCGA.CZ.5465.01",
"TCGA.G6.A8L6.01", "TCGA.BP.5182.01", "TCGA.DV.A4W0.01", "TCGA.G6.A5PC.01",
"TCGA.B0.5094.01", "TCGA.BP.5198.01", "TCGA.BP.5198.01", "TCGA.A3.3367.01",
"TCGA.B8.A54D.01", "TCGA.A3.3387.01", "TCGA.B8.A54D.01", "TCGA.CZ.4853.01",
"TCGA.B0.4810.01", "TCGA.CJ.5682.01", "TCGA.CJ.4923.01", "TCGA.B4.5835.01",
"TCGA.CJ.4918.01", "TCGA.B0.4827.01", "TCGA.B0.4823.01", "TCGA.B4.5835.01",
"TCGA.CJ.4869.01", "TCGA.B8.A54J.01", "TCGA.CW.6093.01", "TCGA.CW.6093.01",
"TCGA.B0.4710.01", "TCGA.CJ.6027.01", "TCGA.CZ.5456.01", "TCGA.CZ.4865.01",
"TCGA.EU.5905.01", "TCGA.CZ.5466.01", "TCGA.B0.5700.01", "TCGA.G6.A8L6.01",
"TCGA.CW.5581.01", "TCGA.DV.A4W0.01", "TCGA.CJ.4923.01", "TCGA.G6.A8L7.01",
"TCGA.G6.A8L7.01", "TCGA.CZ.5465.01", "TCGA.B0.4818.01", "TCGA.B0.4818.01",
"TCGA.A3.3367.01", "TCGA.CJ.6030.01", "TCGA.MM.A564.01", "TCGA.CW.5587.01",
"TCGA.B0.4852.01", "TCGA.CJ.5683.01", "TCGA.B2.5635.01", "TCGA.B2.5635.01",
"TCGA.B2.5635.01", "TCGA.B2.5635.01", "TCGA.CJ.6030.01", "TCGA.B0.4852.01",
"TCGA.B8.4621.01", "TCGA.CZ.5468.01", "TCGA.MM.A564.01", "TCGA.CJ.4920.01",
"TCGA.CJ.5683.01", "TCGA.B0.5690.01", "TCGA.B0.5702.01", "TCGA.B8.4621.01",
"TCGA.CJ.5676.01", "TCGA.CJ.5676.01", "TCGA.CW.5581.01", "TCGA.B0.5098.01",
"TCGA.B2.3924.01", "TCGA.B2.3924.01", "TCGA.CJ.4918.01", "TCGA.B0.4811.01",
"TCGA.A3.3357.01", "TCGA.CZ.5453.01", "TCGA.CW.6090.01", "TCGA.CW.6090.01",
"TCGA.B8.5550.01", "TCGA.CJ.6027.01", "TCGA.CZ.5456.01", "TCGA.B0.4810.01",
"TCGA.CJ.5682.01", "TCGA.CJ.6033.01", "TCGA.B0.5712.01", "TCGA.B0.5712.01",
"TCGA.B0.5700.01", "TCGA.B8.5550.01", "TCGA.B0.4698.01", "TCGA.B8.A54J.01",
"TCGA.B0.4827.01", "TCGA.B0.4823.01", "TCGA.CZ.5461.01", "TCGA.G6.A5PC.01",
"TCGA.CZ.5461.01", "TCGA.CJ.4869.01", "TCGA.B2.5641.01", "TCGA.B2.5641.01",
"TCGA.B0.5709.01", "TCGA.B0.5096.01", "TCGA.B8.5162.01", "TCGA.CJ.4901.01",
"TCGA.B0.5080.01", "TCGA.B0.5695.01", "TCGA.B0.5695.01", "TCGA.B0.4815.01",
"TCGA.B0.5709.01", "TCGA.B0.5116.01", "TCGA.CJ.5675.01", "TCGA.B0.5698.01",
"TCGA.CZ.5462.01", "TCGA.DV.5565.01", "TCGA.B0.4815.01", "TCGA.CZ.4856.01",
"TCGA.CZ.4856.01", "TCGA.CJ.4912.01", "TCGA.B0.5099.01", "TCGA.B0.5116.01",
"TCGA.BP.5199.01", "TCGA.B0.5713.01", "TCGA.B8.5163.01", "TCGA.CJ.5677.01",
"TCGA.B0.5092.01", "TCGA.B0.5110.01", "TCGA.B0.5110.01", "TCGA.CJ.4901.01",
"TCGA.CJ.5672.01", "TCGA.DV.A4VX.01", "TCGA.B0.5099.01", "TCGA.BP.5199.01",
"TCGA.CJ.5675.01", "TCGA.B0.5096.01", "TCGA.CZ.4859.01", "TCGA.B0.5402.01",
"TCGA.B0.5698.01", "TCGA.CZ.5462.01", "TCGA.CJ.5672.01", "TCGA.B8.4622.01",
"TCGA.B8.4622.01", "TCGA.B0.5713.01", "TCGA.BP.5195.01", "TCGA.BP.5195.01",
"TCGA.CZ.4859.01", "TCGA.B8.5163.01", "TCGA.B0.5402.01", "TCGA.B8.5162.01",
"TCGA.B8.A7U6.01", "TCGA.B8.A7U6.01", "TCGA.B0.5080.01")), class = "data.frame", row.names = c(7L,
9L, 10L, 15L, 18L, 22L, 32L, 38L, 49L, 55L, 60L, 61L, 66L, 68L,
71L, 82L, 83L, 85L, 90L, 94L, 96L, 99L, 100L, 101L, 125L, 126L,
128L, 130L, 134L, 135L, 136L, 141L, 142L, 148L, 150L, 151L, 153L,
154L, 165L, 169L, 171L, 172L, 176L, 185L, 186L, 191L, 202L, 204L,
213L, 215L, 222L, 224L, 225L, 239L, 242L, 244L, 247L, 248L, 252L,
269L, 271L, 275L, 281L, 283L, 285L, 296L, 300L, 301L, 303L, 310L,
311L, 313L, 319L, 329L, 330L, 334L, 350L, 351L, 353L, 354L, 359L,
378L, 380L, 382L, 383L, 390L, 397L, 399L, 400L, 401L, 409L, 414L,
420L, 430L, 433L, 450L, 454L, 455L, 456L, 458L, 460L, 462L, 467L,
468L, 472L, 476L, 478L, 490L, 491L, 492L, 493L, 506L, 512L, 526L,
528L, 541L, 543L, 547L, 548L, 550L, 558L, 563L, 564L, 570L, 576L,
578L, 582L, 600L, 610L, 614L, 615L, 618L, 619L, 624L, 627L, 631L,
632L, 633L, 634L, 644L, 646L, 647L, 651L, 652L, 658L, 664L, 666L,
675L, 678L, 679L, 683L, 688L, 691L, 695L, 715L, 727L, 728L, 741L,
750L, 751L, 755L, 760L, 764L, 776L, 786L, 788L, 796L, 809L, 811L,
812L, 817L, 828L, 833L, 844L, 846L, 855L, 858L, 859L, 870L, 871L,
873L, 876L, 878L, 885L, 887L, 888L, 892L, 899L, 900L, 903L, 906L,
921L, 926L, 927L, 944L, 946L, 947L, 948L, 949L, 950L, 953L, 955L,
956L, 968L))
> dput(clin.info)
structure(list(Sample.ID = c("TCGA.A3.3357.01", "TCGA.A3.3367.01",
"TCGA.A3.3387.01", "TCGA.B0.4698.01", "TCGA.B0.4710.01", "TCGA.B0.4810.01",
"TCGA.B0.4811.01", "TCGA.B0.4815.01", "TCGA.B0.4818.01", "TCGA.B0.4821.01",
"TCGA.B0.4823.01", "TCGA.B0.4827.01", "TCGA.B0.4852.01", "TCGA.B0.5080.01",
"TCGA.B0.5092.01", "TCGA.B0.5094.01", "TCGA.B0.5096.01", "TCGA.B0.5098.01",
"TCGA.B0.5099.01", "TCGA.B0.5110.01", "TCGA.B0.5115.01", "TCGA.B0.5116.01",
"TCGA.B0.5402.01", "TCGA.B0.5690.01", "TCGA.B0.5692.01", "TCGA.B0.5695.01",
"TCGA.B0.5698.01", "TCGA.B0.5700.01", "TCGA.B0.5702.01", "TCGA.B0.5705.01",
"TCGA.B0.5709.01", "TCGA.B0.5712.01", "TCGA.B0.5713.01", "TCGA.B2.3924.01",
"TCGA.B2.5635.01", "TCGA.B2.5641.01", "TCGA.B4.5835.01", "TCGA.B8.4153.01",
"TCGA.B8.4621.01", "TCGA.B8.4622.01", "TCGA.B8.5162.01", "TCGA.B8.5163.01",
"TCGA.B8.5164.01", "TCGA.B8.5550.01", "TCGA.B8.A54D.01", "TCGA.B8.A54J.01",
"TCGA.B8.A7U6.01", "TCGA.BP.4801.01", "TCGA.BP.5168.01", "TCGA.BP.5176.01",
"TCGA.BP.5182.01", "TCGA.BP.5195.01", "TCGA.BP.5198.01", "TCGA.BP.5199.01",
"TCGA.CJ.4869.01", "TCGA.CJ.4901.01", "TCGA.CJ.4912.01", "TCGA.CJ.4918.01",
"TCGA.CJ.4920.01", "TCGA.CJ.4923.01", "TCGA.CJ.5672.01", "TCGA.CJ.5675.01",
"TCGA.CJ.5676.01", "TCGA.CJ.5677.01", "TCGA.CJ.5682.01", "TCGA.CJ.5683.01",
"TCGA.CJ.6027.01", "TCGA.CJ.6030.01", "TCGA.CJ.6033.01", "TCGA.CW.5580.01",
"TCGA.CW.5581.01", "TCGA.CW.5587.01", "TCGA.CW.5590.01", "TCGA.CW.6090.01",
"TCGA.CW.6093.01", "TCGA.CZ.4853.01", "TCGA.CZ.4856.01", "TCGA.CZ.4859.01",
"TCGA.CZ.4865.01", "TCGA.CZ.5451.01", "TCGA.CZ.5453.01", "TCGA.CZ.5456.01",
"TCGA.CZ.5459.01", "TCGA.CZ.5461.01", "TCGA.CZ.5462.01", "TCGA.CZ.5465.01",
"TCGA.CZ.5466.01", "TCGA.CZ.5468.01", "TCGA.CZ.5986.01", "TCGA.DV.5565.01",
"TCGA.DV.A4VX.01", "TCGA.DV.A4W0.01", "TCGA.EU.5905.01", "TCGA.EU.5906.01",
"TCGA.G6.A5PC.01", "TCGA.G6.A8L6.01", "TCGA.G6.A8L7.01", "TCGA.G6.A8L8.01",
"TCGA.MM.A564.01"), survival = c("lts", "lts", "lts", "non-lts",
"lts", "non-lts", "non-lts", "non-lts", "non-lts", "non-lts",
"non-lts", "non-lts", "non-lts", "non-lts", "non-lts", "non-lts",
"non-lts", "non-lts", "non-lts", "lts", "lts", "lts", "lts",
"lts", "lts", "lts", "lts", "lts", "lts", "lts", "lts", "lts",
"lts", "lts", "lts", "lts", "lts", "lts", "lts", "lts", "lts",
"lts", "lts", "lts", "lts", "lts", "lts", "lts", "non-lts", "non-lts",
"lts", "lts", "lts", "lts", "lts", "lts", "lts", "non-lts", "non-lts",
"non-lts", "lts", "lts", "lts", "non-lts", "lts", "lts", "lts",
"lts", "non-lts", "lts", "lts", "lts", "non-lts", "lts", "lts",
"lts", "lts", "lts", "non-lts", "lts", "lts", "lts", "lts", "non-lts",
"non-lts", "lts", "lts", "non-lts", "lts", "lts", "non-lts",
"lts", "lts", "lts", "non-lts", "lts", "lts", "lts", "lts")), class = "data.frame", row.names = c(NA,
99L))
</details>
# 答案1
**得分**: 0
```r
library(dplyr)
library(tidyr)
# 使用内连接,并仅保留在两个数据集中都存在的 Sample.ID 记录
inner_join(clin.info, sample.sheet, by = "Sample.ID") %>%
# 将文件名的第一部分提取为 Basename
separate_wider_delim(File.Name, "_", names = "Basename", too_many = "drop") %>%
# 仅保留具有 2 个或更多重复项的 Basenames
filter(n() >= 2, .by = Basename) %>%
# 仅保留最多 2 个相同的 Sample.ID
slice_head(n = 2, by = Sample.ID) %>%
# 在 Basename 前添加路径
mutate(Basename = file.path(getwd(), "idat", Basename))
#> # A tibble: 198 × 3
#> Sample.ID survival Basename
#> <chr> <chr> <chr>
#> 1 TCGA.A3.3357.01 lts c:/tmp_files/idat/4d87a92b-998b-4af6-a45b-a1cc95719…
#> 2 TCGA.A3.3357.01 lts c:/tmp_files/idat/4d87a92b-998b-4af6-a45b-a1cc95719…
#> 3 TCGA.A3.3367.01 lts c:/tmp_files/idat/4fd551c8-c6ca-4068-afb9-610677a68…
#> 4 TCGA.A3.3367.01 lts c:/tmp_files/idat/4fd551c8-c6ca-4068-afb9-610677a68…
#> 5 TCGA.A3.3387.01 lts c:/tmp_files/idat/4086487a-70af-4091-8691-a04adbdef…
#> 6 TCGA.A3.3387.01 lts c:/tmp_files/idat/4086487a-70af-4091-8691-a04adbdef…
#> 7 TCGA.B0.4698.01 non-lts c:/tmp_files/idat/182cce6f-eb18-4c51-9c8d-47a0770ef…
#> 8 TCGA.B0.4698.01 non-lts c:/tmp_files/idat/182cce6f-eb18-4c51-9c8d-47a0770ef…
#> 9 TCGA.B0.4710.01 lts c:/tmp_files/idat/c1e9a16d-7244-4dbf-a8a8-36a4bf948…
#> 10 TCGA.B0.4710.01 lts c:/tmp_files/idat/c1e9a16d-7244-4dbf-a8a8-36a4bf948…
#> # ℹ 188 more rows
创建于 2023-05-28,使用 reprex v2.0.2
<details>
<summary>英文:</summary>
``` r
library(dplyr)
library(tidyr)
# join and keep only records with Sample.ID's that are present in both datasets
inner_join(clin.info,sample.sheet, by = "Sample.ID") %>%
# first part of filename to Basename
separate_wider_delim(File.Name, "_", names = "Basename", too_many = "drop") %>%
# keep only Basenames with 2 or more duplicates
filter(n() >= 2, .by = Basename) %>%
# keep only up to 2 same Sample.ID's
slice_head(n = 2, by = Sample.ID) %>%
# prepend paths to Basename
mutate(Basename = file.path(getwd(),"idat", Basename))
#> # A tibble: 198 × 3
#> Sample.ID survival Basename
#> <chr> <chr> <chr>
#> 1 TCGA.A3.3357.01 lts c:/tmp_files/idat/4d87a92b-998b-4af6-a45b-a1cc95719…
#> 2 TCGA.A3.3357.01 lts c:/tmp_files/idat/4d87a92b-998b-4af6-a45b-a1cc95719…
#> 3 TCGA.A3.3367.01 lts c:/tmp_files/idat/4fd551c8-c6ca-4068-afb9-610677a68…
#> 4 TCGA.A3.3367.01 lts c:/tmp_files/idat/4fd551c8-c6ca-4068-afb9-610677a68…
#> 5 TCGA.A3.3387.01 lts c:/tmp_files/idat/4086487a-70af-4091-8691-a04adbdef…
#> 6 TCGA.A3.3387.01 lts c:/tmp_files/idat/4086487a-70af-4091-8691-a04adbdef…
#> 7 TCGA.B0.4698.01 non-lts c:/tmp_files/idat/182cce6f-eb18-4c51-9c8d-47a0770ef…
#> 8 TCGA.B0.4698.01 non-lts c:/tmp_files/idat/182cce6f-eb18-4c51-9c8d-47a0770ef…
#> 9 TCGA.B0.4710.01 lts c:/tmp_files/idat/c1e9a16d-7244-4dbf-a8a8-36a4bf948…
#> 10 TCGA.B0.4710.01 lts c:/tmp_files/idat/c1e9a16d-7244-4dbf-a8a8-36a4bf948…
#> # ℹ 188 more rows
<sup>Created on 2023-05-28 with reprex v2.0.2</sup>
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论