改进循环,使用mutate函数。

huangapple go评论63阅读模式
英文:

Improve loop by using mutate function

问题

I have a data frame called result which looks like that.

lat lng Night
41.60701 1.000831 2019-06-19
41.98151 1.973059 2020-04-11
... ... ...

Basically, I would add 4 columns. One column for the time of sunset, the second for the sunrise, the third for the duration of the night in hours, and finally, the fourth for the sampling effort (adding the time buffer to the duration of the night). I managed to do this by using a loop in the following code (using the suncalc package for the getSunlightTimes).

library("plyr")
library("dplyr")
library("reshape")
library("data.table")
library("stringr")
library("tidyr")
library("ineq")
library("suncalc")

library(suncalc)
time_buff <- 0.30
posta <- ls()
sorti <- ls()
night_hours <- ls()
temp <- result
for (i in 1:dim(temp)[1]) {
  lat <- temp$lat[i]
  long <- temp$lng[i]
  sset <- as.Date(temp$Night[i])
  sris <- sset + 1
  Tsset <- getSunlightTimes(sset, lat, long,
    keep = c("sunrise", "sunset"), tz = "UTC"
  )$sunset
  Tsris <- getSunlightTimes(sris, lat, long,
    keep = c("sunrise", "sunset"), tz = "UTC"
  )$sunrise
  posta[i] <- Tsset
  sorti[i] <- Tsris
  night_hours[i] <- round(as.numeric(Tsris - Tsset), 2)
}

# fetch results
temp$sun_set <- as.POSIXct(as.numeric(unlist(posta)),
  origin = "1970-01-01", tz = "UTC"
)
temp$sun_rise <- as.POSIXct(as.numeric(unlist(sorti)),
  origin = "1970-01-01", tz = "UTC"
)
temp$night_hours <- as.numeric(unlist(night_hours))
temp$night_effort <- as.numeric(temp$night_hours) + (time_buff * 2)

result <- temp

But it takes a very long time to run. So, I would like to know if there is another simpler way to do that, using, for example, the mutate function from the dplyr package instead of using a loop?

英文:

I have a data frame called result which looks like that.

lat lng Night
41.60701 1.000831 2019-06-19
41.98151 1.973059 2020-04-11
... ... ...

Basically, I whoul add 4 columns. One column for the time of sun set, the second for the sun rise, the third for the duration of the night in hour and finally the fourth for the sampling effort (I juste add the time buff to the duration of the night). I managed to do this by using a loop in the following code (unsing suncalc package for the getSunlightTimes).

library(&quot;plyr&quot;)
library(&quot;dplyr&quot;)
library(&quot;reshape&quot;)
library(&quot;data.table&quot;)
library(&quot;stringr&quot;)
library(&quot;tidyr&quot;)
library(&quot;ineq&quot;)
library(&quot;suncalc&quot;)

library(suncalc)
time_buff &lt;- 0.30
posta &lt;- ls()
sorti &lt;- ls()
night_hours &lt;- ls()
temp &lt;- result
for (i in 1:dim(temp)[1]) {
  lat &lt;- temp$lat[i]
  long &lt;- temp$lng[i]
  sset &lt;- as.Date(temp$Night[i])
  sris &lt;- sset + 1
  Tsset &lt;- getSunlightTimes(sset, lat, long,
    keep = c(&quot;sunrise&quot;, &quot;sunset&quot;), tz = &quot;UTC&quot;
  )$sunset
  Tsris &lt;- getSunlightTimes(sris, lat, long,
    keep = c(&quot;sunrise&quot;, &quot;sunset&quot;), tz = &quot;UTC&quot;
  )$sunrise
  posta[i] &lt;- Tsset
  sorti[i] &lt;- Tsris
  night_hours[i] &lt;- round(as.numeric(Tsris - Tsset), 2)
}


# fetch results
temp$sun_set &lt;- as.POSIXct(as.numeric(unlist(posta)),
  origin = &quot;1970-01-01&quot;, tz = &quot;UTC&quot;
)
temp$sun_rise &lt;- as.POSIXct(as.numeric(unlist(sorti)),
  origin = &quot;1970-01-01&quot;, tz = &quot;UTC&quot;
)
temp$night_hours &lt;- as.numeric(unlist(night_hours))
temp$night_effort &lt;- as.numeric(temp$night_hours) + (time_buff * 2)

result &lt;- temp

But it take very long time to run. So, I would know if there is an other simplest way to do that, using for example the mutate function from dplyr package instead of using a loop ?

答案1

得分: 2

基本计算可以使用tidyverse中的rowwise来完成,即getSunlightTimes对于latlong不是矢量化的,所以我们必须一次提供单个值。如果有重复的'lat'和'long',而不是使用rowwise,可能更好的方法是使用group_by(lat, lng),然后在getSunlightTimes调用中使用first(lat)first(lng)

library(dplyr)
data %>%
  rowwise %>%
  mutate(sset = as.Date(Night), sris = sset + 1) %>%
  mutate(Tsset = getSunlightTimes(sset, lat, lng, keep = "sunset", tz = "UTC")$sunset,
         Tsris = getSunlightTimes(sris, lat, lng, keep = "sunrise", tz = "UTC")$sunrise) %>%
  ungroup

输出结果:

# A tibble: 2 x 7
    lat   lng Night      sset       sris       Tsset               Tsris              
  <dbl> <dbl> <chr>      <date>     <date>     <dttm>              <dttm>             
1  41.6  1.00 2019-06-19 2019-06-19 2019-06-20 2019-06-19 19:34:19 2019-06-20 04:22:55
2  42.0  1.97 2020-04-11 2020-04-11 2020-04-12 2020-04-11 18:29:30 2020-04-12 05:17:10

数据:

data <- structure(list(lat = c(41.60701, 41.98151), lng = c(1.000831, 1.973059), Night = c("2019-06-19", "2020-04-11")), class = "data.frame", row.names = c(NA, -2L))
英文:

The basic calculation can be done in tidyverse with rowwise - i.e. getSunlightTimes is not vectorized for the lat, long so we have to provide only a single value at a time. If there are duplicates for 'lat', 'long', instead of rowwise, may be better to do group_by(lat, lng) and then use first(lat), first(lng) in the getSunlightTimes call

library(dplyr)
data %&gt;%
  rowwise %&gt;%
  mutate(sset = as.Date(Night),  sris = sset + 1) %&gt;% 
  mutate(Tsset = getSunlightTimes(sset, lat, lng,  keep =&quot;sunset&quot;,
   tz = &quot;UTC&quot;)$sunset,
  Tsris = getSunlightTimes(sris, lat, lng,  keep =&quot;sunrise&quot;, 
   tz = &quot;UTC&quot;)$sunrise) %&gt;%
 ungroup

-output

# A tibble: 2 &#215; 7
    lat   lng Night      sset       sris       Tsset               Tsris              
  &lt;dbl&gt; &lt;dbl&gt; &lt;chr&gt;      &lt;date&gt;     &lt;date&gt;     &lt;dttm&gt;              &lt;dttm&gt;             
1  41.6  1.00 2019-06-19 2019-06-19 2019-06-20 2019-06-19 19:34:19 2019-06-20 04:22:55
2  42.0  1.97 2020-04-11 2020-04-11 2020-04-12 2020-04-11 18:29:30 2020-04-12 05:17:10

data

data &lt;- structure(list(lat = c(41.60701, 41.98151), lng = c(1.000831, 
1.973059), Night = c(&quot;2019-06-19&quot;, &quot;2020-04-11&quot;)), class = &quot;data.frame&quot;, row.names = c(NA, 
-2L))

答案2

得分: 1

以下是要翻译的内容:

Update:

我们不需要使用group_byrowwise。通过阅读?getSunlightTimes,我们可以了解到,如果我们有多个坐标,可以使用data作为替代方法:

date:日期。单个或多个日期。YYYY-MM-DD

lat:数字。单个纬度

lon:数字。单个经度

data:数据框。用于传递多个坐标的替代方法

keep:字符。要保留的变量向量。请参阅详细信息

tz:字符。结果的时区

因此,我们可以将整个数据框作为参数传递给函数,但需要为列提供正确的名称。请参见下文:

result %>%
  mutate(night = as.Date(night)) %>%
  mutate(sunset = getSunlightTimes(data = transmute(., 
                                          date = night, lat = lat, lon = long), 
                                   keep = "sunset")$sunset,
         sunrise = getSunlightTimes(data = transmute(., 
                                           date = night + 1, lat = lat, lon = long), 
                                    keep = "sunrise")$sunrise,
         night_hr = as.numeric(round(difftime(sunrise, sunset, units = "hour"), 2)),
         night_effort = night_hr + (time_buff * 2))

我们可以使用rowwise而不是循环。或者更好的是,使用group_by(lat, long),只传递每个组的第一个纬度和经度。

library(lubridate)
library(dplyr)
library(suncalc)

result <- data.frame(lat = c(39.9526, 34.0522), 
                     long = c(-75.1652, -118.243), 
                     night = c(mdy("4/13/2023"), mdy("4/01/2023")))
time_buff <- 0.3

result %>%
  group_by(lat, long) %>%
  mutate(sunset = getSunlightTimes(as.Date(night), lat[1], long[1])$sunset,
         sunrise = getSunlightTimes(as.Date(night) + 1, lat[1], long[1])$sunrise,
         night_hr = as.numeric(round(difftime(sunrise, sunset, units = "hour"), 2)),
         night_effort = night_hr + (time_buff * 2)) %>%
  ungroup()

希望这些翻译对您有所帮助。

英文:

Update:

We don't need to use a group_by or rowwise. Reading ?getSunlightTimes tells us to use data as an alternative if we have multiple coordinates:

> date : Date. Single or multiple Date. YYYY-MM-DD
>
> lat : numeric. Single latitude
>
> lon : numeric. Single longitude
>
> data : data.frame. Alternative to use date, lat, lon for passing multiple coordinates
>
> keep : character. Vector of variables to keep. See Details
>
> tz :> character. Timezone of results

So we can pass the dataframe as a whole to the function, but need to have the right names for the columns. See below;

result %&gt;% 
  mutate(night = as.Date(night)) %&gt;% 
  mutate(sunset = getSunlightTimes(data = transmute(., 
                                          date = night, lat = lat, lon = long), 
                                   keep = &quot;sunset&quot;)$sunset,
         sunrise = getSunlightTimes(data = transmute(., 
                                           date = night + 1, lat = lat, lon = long), 
                                    keep = &quot;sunrise&quot;)$sunrise,
         night_hr = as.numeric(round(difftime(sunrise, sunset, units = &quot;hour&quot;), 2)),
         night_effort = night_hr + (time_buff * 2))

#&gt; # A tibble: 2 x 7
#&gt;     lat   long night      sunset              sunrise             night_hr night_effort
#&gt;   &lt;dbl&gt;  &lt;dbl&gt; &lt;date&gt;     &lt;dttm&gt;              &lt;dttm&gt;                 &lt;dbl&gt;        &lt;dbl&gt;
#&gt; 1  40.0  -75.2 2023-04-13 2023-04-13 23:37:13 2023-04-14 10:25:55     10.8         11.4
#&gt; 2  34.1 -118.  2023-04-01 2023-04-02 02:14:19 2023-04-02 13:40:21     11.4         12.0


We can use rowwise instead of a loop. Or better, group_by(lat, long) and only pass the first lat and long for each group.

library(lubridate)
library(dplyr)
library(suncalc)

result &lt;- data.frame(lat = c(39.9526,34.0522), 
                     long = c(-75.1652, -118.243), 
                     night = c(mdy(&quot;4/13/2023&quot;),mdy(&quot;4/01/2023&quot;)))
time_buff &lt;- 0.3

result %&gt;% 
  group_by(lat, long) %&gt;% 
  mutate(sunset = getSunlightTimes(as.Date(night), lat[1], long[1])$sunset,
         sunrise = getSunlightTimes(as.Date(night) + 1, lat[1], long[1])$sunrise,
         night_hr = as.numeric(round(difftime(sunrise, sunset, units = &quot;hour&quot;), 2)),
         night_effort = night_hr + (time_buff * 2)) %&gt;% 
  ungroup()

#&gt; # A tibble: 2 x 7
#&gt;     lat   long night      sunset              sunrise             night_hr night_effort
#&gt;   &lt;dbl&gt;  &lt;dbl&gt; &lt;date&gt;     &lt;dttm&gt;              &lt;dttm&gt;                 &lt;dbl&gt;        &lt;dbl&gt;
#&gt; 1  40.0  -75.2 2023-04-13 2023-04-13 23:37:13 2023-04-14 10:25:55     10.8         11.4
#&gt; 2  34.1 -118.  2023-04-01 2023-04-02 02:14:19 2023-04-02 13:40:21     11.4         12.0

huangapple
  • 本文由 发表于 2023年4月13日 22:26:24
  • 转载请务必保留本文链接:https://go.coder-hub.com/76006613.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定