使用geeglm()中的margins()在R中。

huangapple go评论102阅读模式
英文:

Using margins() with geeglm() in R

问题

我已经创建了一系列使用geeglm()函数的GEE模型。我想在exposure_1为正和负时计算主要结果的边际几率比。

我想绘制总体几率和两个边际几率比的森林图,以便进行比较。然而,margins()不接受geeglm()模型。

我一直没有找到另一种在margins中使用geeglm()输出的方法。我将不胜感激任何帮助。

以下是一个创建geeglm模型并将其转化为森林图的代码的通用版本:

  1. library(ggplot2)
  2. library(dplyr)
  3. # 为了可重现性,设置随机种子
  4. set.seed(13)
  5. # 数据框中的行数
  6. n <- 100
  7. # 生成暴露和结果的虚构数据
  8. exposure_1 <- sample(0:1, n, replace = TRUE)
  9. exposure_2 <- sample(0:1, n, replace = TRUE)
  10. exposure_3 <- sample(0:1, n, replace = TRUE)
  11. # 生成主要结果的虚构数据(假设是二元结果)
  12. # 如果需要,可以基于某种逻辑或模型生成结果
  13. primary_outcome <- rbinom(n, 1, 0.3 + 0.2 * exposure_1 + 0.1 * exposure_2 - 0.1 * exposure_3)
  14. # 生成社区/分组变量的虚构数据
  15. enr_community <- sample(letters[1:5], n, replace = TRUE)
  16. # 合并生成的数据以创建数据框
  17. df <- data.frame(
  18. exposure_1 = exposure_1,
  19. exposure_2 = exposure_2,
  20. exposure_3 = exposure_3,
  21. primary_outcome = primary_outcome,
  22. enr_community = enr_community
  23. )
  24. logit_model_1 <- geeglm(formula = primary_outcome ~ exposure_1*exposure_2 + exposure_3 , family = binomial, id = enr_community, corstr = "independence", data = df)
  25. lcf <- logit_model_1 %>%
  26. tidy(exponentiate = TRUE, conf.int = TRUE, conf.level = 0.95) %>%
  27. mutate(model = "Main Result") %>%
  28. mutate(exposure = "Exposure 1") %>%
  29. filter(term == "exposure_1")
  30. lcftable <- lcf |>
  31. # 对估计值和95%置信区间四舍五入到小数点后2位,以满足期刊要求
  32. mutate(across(
  33. c(estimate, conf.low, conf.high, p.value),
  34. ~ str_pad(
  35. round(.x,2),
  36. width = 4,
  37. pad = "0",
  38. side = "right"
  39. ))) %>%
  40. mutate(estimate = case_when(
  41. estimate == "1000" ~ "1.00",
  42. TRUE ~ estimate
  43. )) %>%
  44. mutate(conf.low = case_when(
  45. conf.low == "1000" ~ "1.00",
  46. TRUE ~ conf.low
  47. )) %>%
  48. mutate(conf.high = case_when(
  49. conf.high == "2000" ~ "2.00",
  50. TRUE ~ conf.high
  51. )) %>%
  52. mutate(model = fct_rev(fct_relevel(model, "Model")))
  53. lcftable <- lcftable %>%
  54. # 转换回字符
  55. mutate(estimate = format(estimate, n.small = 3),
  56. conf.low = format(conf.low, n.small = 3),
  57. conf.high = format(conf.high, n.small = 3),
  58. estimate_lab = paste0(estimate, " (", as.character(conf.low), " -", conf.high, ")")) |>
  59. mutate(p.value = as.numeric(p.value)) %>%
  60. # 四舍五入p值到两位小数,除非p < 0.001
  61. mutate(p.value = case_when(
  62. p.value < .001 ~ "<0.001",
  63. round(p.value, 2) == .05 ~ as.character(round(p.value,3)),
  64. p.value < .01 ~ str_pad( # 如果小于0.01,则再多保留一位小数
  65. as.character(round(p.value, 3)),
  66. width = 4,
  67. pad = "0",
  68. side = "right"
  69. ),
  70. p.value > 0.995 ~ "1.00",
  71. TRUE ~ str_pad( # 否则只四舍五入到2位小数,并填充字符串以使0.20读作0.20
  72. as.character(round(p.value, 2)),
  73. width = 4,
  74. pad = "0",
  75. side = "right"
  76. ))) %>%
  77. mutate(estimate = as.numeric(estimate),
  78. conf.low = as.numeric(conf.low),
  79. conf.high = as.numeric(conf.high))
  80. lcfplot1 <- lcftable %>%
  81. ggplot(mapping = aes(x = estimate, xmin = conf.low, xmax = conf.high, y = model)) + #y = model rather than term
  82. geom_pointrange() +
  83. # scale_x_continuous(breaks = c(0, 1.0, 1.5, 2.0)) + #trans = scales::pseudo_log_trans(base = 10)
  84. geom_vline(xintercept = 1) +
  85. geom_point(size = 1.0) +
  86. # 调整分面网格
  87. facet_wrap(exposure~., ncol = 1, scale = "free_y") +
  88. xlab("Incident Odds Ratio, IOR (95% Confidence Interval)") +
  89. ggtitle("Associations between exposures and outcome") +
  90. geom_text(aes(x = 3.5, label = estimate_lab), hjust = -0.5, size = 3) +
  91. geom_text(aes(x = 2.5, label = p.value), hjust = -3.5, size = 3) +
  92. coord_cartesian(xlim = c(-0.1, 7.0)) +
  93. scale_x_continuous(trans = "pseudo_log", breaks = c(-1.0, 0, 1.0, 2.0, 5.0)) +
  94. theme(panel.background = element_blank(),
  95. panel.spacing = unit(1, "lines"),
  96. panel.border = element_rect(fill = NA, color = "white"),
  97. strip.background = element_rect(colour="white", fill="white"),
  98. strip.placement = "outside",
  99. text = element_text(size = 12),
  100. strip.text = element_text(face = "bold.italic", size = 11, hjust = 0.33), #element_blank(),
  101. plot.title = element_text(face = "bold", size = 13, hjust =
  102. <details>
  103. <summary>英文:</summary>
  104. I have created a series of GEE models using geeglm(). I would like to calculate the marginal odds ratios for primary_outcome when exposure_1 is positive and negative.
  105. I would like to plot the overall odds and the two marginal odds ratios sequentially in a forest plot to allow for comparison. However, margins() does not accept a geeglm() model.
  106. I have not been able to figure out another way to use geeglm() output with margins. I would appreciate any help.
  107. Here is a generic version of the code which creates a geeglm model and shows how I am translating it into a forest plot
  108. library(ggplot2)
  109. library(dplyr)
  110. # Set a random seed for reproducibility
  111. set.seed(13)
  112. # Number of rows in the dataframe
  113. n &lt;- 100
  114. # Generate made-up data for exposures and outcome
  115. exposure_1 &lt;- sample(0:1, n, replace = TRUE)
  116. exposure_2 &lt;- sample(0:1, n, replace = TRUE)
  117. exposure_3 &lt;- sample(0:1, n, replace = TRUE)
  118. # Generate made-up data for the primary outcome (assuming a binary outcome)
  119. # The outcome could be generated based on some logic or model if needed
  120. primary_outcome &lt;- rbinom(n, 1, 0.3 + 0.2 * exposure_1 + 0.1 * exposure_2 - 0.1 * exposure_3)
  121. # Generate made-up data for the community/grouping variable
  122. enr_community &lt;- sample(letters[1:5], n, replace = TRUE)
  123. # Combine the generated data to create the dataframe
  124. df &lt;- data.frame(
  125. exposure_1 = exposure_1,
  126. exposure_2 = exposure_2,
  127. exposure_3 = exposure_3,
  128. primary_outcome = primary_outcome,
  129. enr_community = enr_community
  130. )
  131. logit_model_1 &lt;- geeglm(formula = primary_outcome ~ exposure_1*exposure_2 + exposure_3 , family = binomial, id = enr_community, corstr = &quot;independence&quot;, data = df)
  132. lcf &lt;- logit_model_1 %&gt;%
  133. tidy(exponentiate = TRUE, conf.int = TRUE, conf.level = 0.95) %&gt;%
  134. mutate(model = &quot;Main Result&quot;) %&gt;%
  135. mutate(exposure = &quot;Exposure 1&quot;) %&gt;%
  136. filter(term == &quot;exposure_1&quot;)
  137. lcftable &lt;- lcf |&gt;
  138. # round estimates and 95% CIs to 2 decimal places for journal specifications
  139. mutate(across(
  140. c(estimate, conf.low, conf.high, p.value),
  141. ~ str_pad(
  142. round(.x,2),
  143. width = 4,
  144. pad = &quot;0&quot;,
  145. side = &quot;right&quot;
  146. ))) %&gt;%
  147. mutate(estimate = case_when(
  148. estimate == &quot;1000&quot; ~ &quot;1.00&quot;,
  149. TRUE ~ estimate
  150. )) %&gt;%
  151. mutate(conf.low = case_when(
  152. conf.low == &quot;1000&quot; ~ &quot;1.00&quot;,
  153. TRUE ~ conf.low
  154. )) %&gt;%
  155. mutate(conf.high = case_when(
  156. conf.high == &quot;2000&quot; ~ &quot;2.00&quot;,
  157. TRUE ~ conf.high
  158. )) %&gt;%
  159. mutate(model = fct_rev(fct_relevel(model, &quot;Model&quot;)))
  160. lcftable &lt;- lcftable %&gt;%
  161. #convert back to characters
  162. mutate(estimate = format(estimate, n.small = 3),
  163. conf.low = format(conf.low, n.small = 3),
  164. conf.high = format(conf.high, n.small = 3),
  165. estimate_lab = paste0(estimate, &quot; (&quot;, as.character(conf.low), &quot; -&quot;, conf.high, &quot;)&quot;)) |&gt;
  166. mutate(p.value = as.numeric(p.value)) %&gt;%
  167. # round p-values to two decimal places, except in cases where p &lt; .001
  168. mutate(p.value = case_when(
  169. p.value &lt; .001 ~ &quot;&lt;0.001&quot;,
  170. round(p.value, 2) == .05 ~ as.character(round(p.value,3)),
  171. p.value &lt; .01 ~ str_pad( # if less than .01, go one more decimal place
  172. as.character(round(p.value, 3)),
  173. width = 4,
  174. pad = &quot;0&quot;,
  175. side = &quot;right&quot;
  176. ),
  177. p.value &gt; 0.995 ~ &quot;1.00&quot;,
  178. TRUE ~ str_pad( # otherwise just round to 2 decimal places and pad string so that .2 reads as 0.20
  179. as.character(round(p.value, 2)),
  180. width = 4,
  181. pad = &quot;0&quot;,
  182. side = &quot;right&quot;
  183. ))) %&gt;%
  184. mutate(estimate = as.numeric(estimate),
  185. conf.low = as.numeric(conf.low),
  186. conf.high = as.numeric(conf.high))
  187. lcfplot1 &lt;- lcftable %&gt;%
  188. ggplot(mapping = aes(x = estimate, xmin = conf.low, xmax = conf.high, y = model)) + #y = model rather than term
  189. geom_pointrange() +
  190. # scale_x_continuous(breaks = c(0, 1.0, 1.5, 2.0)) + #trans = scales::pseudo_log_trans(base = 10)
  191. geom_vline(xintercept = 1) +
  192. geom_point(size = 1.0) +
  193. #adjust facet grid
  194. facet_wrap(exposure~., ncol = 1, scale = &quot;free_y&quot;) +
  195. xlab(&quot;Incident Odds Ratio, IOR (95% Confidence Interval)&quot;) +
  196. ggtitle(&quot;Associations between exposures and outcome&quot;) +
  197. geom_text(aes(x = 3.5, label = estimate_lab), hjust = -0.5, size = 3) +
  198. geom_text(aes(x = 2.5, label = p.value), hjust = -3.5, size = 3) +
  199. coord_cartesian(xlim = c(-0.1, 7.0)) +
  200. scale_x_continuous(trans = &quot;pseudo_log&quot;, breaks = c(-1.0, 0, 1.0, 2.0, 5.0)) +
  201. theme(panel.background = element_blank(),
  202. panel.spacing = unit(1, &quot;lines&quot;),
  203. panel.border = element_rect(fill = NA, color = &quot;white&quot;),
  204. strip.background = element_rect(colour=&quot;white&quot;, fill=&quot;white&quot;),
  205. strip.placement = &quot;outside&quot;,
  206. text = element_text(size = 12),
  207. strip.text = element_text(face = &quot;bold.italic&quot;, size = 11, hjust = 0.33), #element_blank(),
  208. plot.title = element_text(face = &quot;bold&quot;, size = 13, hjust = -11, vjust = 4),
  209. axis.title.x = element_text(size = 10, vjust = -1, hjust = 0.05),
  210. axis.title.y = element_blank(),
  211. panel.grid.major.x = element_line(color = &quot;#D3D3D3&quot;,
  212. size = 0.3, linetype = 2),
  213. plot.margin = unit(c(1, 3, 0.5, 0.5), &quot;inches&quot;)) +
  214. geom_errorbarh(height = 0)
  215. </details>
  216. # 答案1
  217. **得分**: 2
  218. `margins`软件包已经有一段时间没有更新,基本上处于维护模式。我建议转向更新的、通常更好的`marginaleffects`软件包。Vincent编写了一本出色的指南书:https://vincentarelbundock.github.io/marginaleffects/
  219. 我没有尝试过,但我可以看到`geeglm`模型是受支持的对象类别。
  220. <details>
  221. <summary>英文:</summary>
  222. The `margins` package has not been updated for some time and is basically in maintenance mode. I would suggest moving over to the newer and generally much better `marginaleffects` package. Vincent has put together a phenomenal guidebook: https://vincentarelbundock.github.io/marginaleffects/
  223. I have not tried it, but I can see that geeglm models are a supported object class.
  224. </details>

huangapple
  • 本文由 发表于 2023年7月31日 23:47:14
  • 转载请务必保留本文链接:https://go.coder-hub.com/76805201.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定