英文:
Try to make dataframe from web scraping
问题
I Try to make dataframe with the data from this url, https://api.finance.naver.com/siseJson.naver?symbol=005930&requestType=1&startTime=20220312&endTime=20230517&timeframe=day, in r studio.
rm(list = ls(all=T))
library(jsonlite)
library(dplyr)
url <- "https://api.finance.naver.com/siseJson.naver?symbol=005930&requestType=1&startTime=20220312&endTime=20230517&timeframe=day"
data <- readLines(url, warn = FALSE)
print(data)
data_t <- gsub("\t","",data)
print(data_t)
Conclusionly, data is printed like this.
[1] ""
[2] " [['날짜', '시가', '고가', '저가', '종가', '거래량', '외국인소진율'],"
[3] ""
[4] ""
[5] ""
[6] ""
[7] "[\"20220314\", 70000, 70200, 69600, 70200, 9040993, 51.84],"
[8] ""
[9] "[\"20220315\", 69800, 70100, 69500, 69500, 10258562, 51.8],"
[10] ""
[11] "[\"20220316\", 70200, 70500, 69700, 70400, 10175750, 51.79],"
[12] ""
[13] "[\"20220317\", 71200, 71800, 70900, 71200, 17646315, 51.79],"
[14] ""
[15] "[\"20220318\", 70600, 70900, 70200, 70700, 14410038, 51.79],"
how to pick a data and connect correctly.
how to write code and complete it?
Please to make a code for dataframe from the url.
英文:
I Try to make dataframe with the data from this url,https://api.finance.naver.com/siseJson.naver?symbol=005930&requestType=1&startTime=20220312&endTime=20230517&timeframe=day, in r studio.
rm(list = ls(all=T))
library(jsonlite)
library(dplyr)
url <- "https://api.finance.naver.com/siseJson.naver?symbol=005930&requestType=1&startTime=20220312&endTime=20230517&timeframe=day"
data <- readLines(url, warn = FALSE)
print(data)
data_t <- gsub("\t","",data)
print(data_t)
Conclusionly, data is printed like this.
[1] ""
[2] " [['날짜', '시가', '고가', '저가', '종가', '거래량', '외국인소진율'],"
[3] ""
[4] ""
[5] ""
[6] ""
[7] "[\"20220314\", 70000, 70200, 69600, 70200, 9040993, 51.84],"
[8] ""
[9] "[\"20220315\", 69800, 70100, 69500, 69500, 10258562, 51.8],"
[10] ""
[11] "[\"20220316\", 70200, 70500, 69700, 70400, 10175750, 51.79],"
[12] ""
[13] "[\"20220317\", 71200, 71800, 70900, 71200, 17646315, 51.79],"
[14] ""
[15] "[\"20220318\", 70600, 70900, 70200, 70700, 14410038, 51.79],"
how to pick a data and connect correctly.
how to write code and complete it?
Please to make a code for dataframe from the url.
答案1
得分: 1
以下是您要翻译的内容:
library(dplyr)
library(stringr)
url <- "https://api.finance.naver.com/siseJson.naver?symbol=005930&requestType=1&startTime=20220312&endTime=20230517&timeframe=day"
data <- readLines(url, warn = FALSE)
当我们移除制表符\t
时,同时也可以去掉括号,而 strsplit()
允许我们将行拆分为向量,其中每个未来的数据框列都是单独的条目。
data2 <-
gsub("\t|\\[|\\]","",data) %>
strsplit(", ")
让我们从第二行获取列名并从数据对象中删除该行。
column_names <-
data2[[2]] %>
trimws() %>
str_remove_all("'")
data3 <-
data2[-2]
删除空行。
data4 <-
data3[lengths(data3) != 0]
为每个数据向量添加列名,并使用 dplyr::bind_rows()
绑定到数据帧中。
lapply(data4,
setNames,
column_names) %>
bind_rows() %>
mutate(날짜 = str_remove_all(날짜, "\\\\"))
#> # A tibble: 294 × 7
#> 날짜 시가 고가 저가 종가 거래량 `외국인소진율,`
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 20220314 70000 70200 69600 70200 9040993 51.84,
#> 2 20220315 69800 70100 69500 69500 10258562 51.8,
#> 3 20220316 70200 70500 69700 70400 10175750 51.79,
#> 4 20220317 71200 71800 70900 71200 17646315 51.79,
#> 5 20220318 70600 70900 70200 70700 14410038 51.79,
#> 6 20220321 70900 71000 69900 69900 11169002 51.75,
#> 7 20220322 69900 70500 69900 70300 9402666 51.75,
#> 8 20220323 70600 71200 70300 70500 12398025 51.74,
#> 9 20220324 69600 70300 69600 69800 37943357 51.97,
#> 10 20220325 70100 70200 69600 69800 12986010 51.91,
#> # ℹ 284 more rows
英文:
library(dplyr)
library(stringr)
url <- "https://api.finance.naver.com/siseJson.naver?symbol=005930&requestType=1&startTime=20220312&endTime=20230517&timeframe=day"
data <- readLines(url, warn = FALSE)
When we remove the tabs \t
we can get rid of the brackets at the same time
and strsplit()
allows us to break up the lines in to vectors where each
future dataframe column is a separate entry.
data2 <-
gsub("\t|\\[|\\]","",data) |>
strsplit(", ")
Let’s get the column names from the second line and drop the line from the
data object
column_names <-
data2[[2]] |>
trimws() |>
str_remove_all("'")
data3 <-
data2[-2]
Drop empty rows.
data4 <-
data3[lengths(data3) != 0]
Add column names to each data vector and bind it into a data.frame with
dplyr::bind_rows()
lapply(data4,
setNames,
column_names) |>
bind_rows() |>
mutate(날짜 = str_remove_all(날짜, "\\\""))
#> # A tibble: 294 × 7
#> 날짜 시가 고가 저가 종가 거래량 `외국인소진율,`
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 20220314 70000 70200 69600 70200 9040993 51.84,
#> 2 20220315 69800 70100 69500 69500 10258562 51.8,
#> 3 20220316 70200 70500 69700 70400 10175750 51.79,
#> 4 20220317 71200 71800 70900 71200 17646315 51.79,
#> 5 20220318 70600 70900 70200 70700 14410038 51.79,
#> 6 20220321 70900 71000 69900 69900 11169002 51.75,
#> 7 20220322 69900 70500 69900 70300 9402666 51.75,
#> 8 20220323 70600 71200 70300 70500 12398025 51.74,
#> 9 20220324 69600 70300 69600 69800 37943357 51.97,
#> 10 20220325 70100 70200 69600 69800 12986010 51.91,
#> # ℹ 284 more rows
答案2
得分: 1
library(dplyr)
"https://api.finance.naver.com/siseJson.naver?symbol=005930&requestType=1&startTime=20220312&endTime=20230517&timeframe=day" |>
readr::read_csv(col_names = c(
'날짜', '시가', '고가', '저가', '종가', '거래량', '외국인소진율'
)) |
mutate(across(where(is.character), readr::parse_number)) |
select(-last_col()) |
slice(2:n())
Results:
# A tibble: 295 × 7
날짜 시가 고가 저가 종가 거래량 외국인소진율
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 20220314 70000 70200 69600 70200 9040993 51.8
2 20220315 69800 70100 69500 69500 10258562 51.8
3 20220316 70200 70500 69700 70400 10175750 51.8
4 20220317 71200 71800 70900 71200 17646315 51.8
5 20220318 70600 70900 70200 70700 14410038 51.8
6 20220321 70900 71000 69900 69900 11169002 51.8
7 20220322 69900 70500 69900 70300 9402666 51.8
8 20220323 70600 71200 70300 70500 12398025 51.7
9 20220324 69600 70300 69600 69800 37943357 52.0
10 20220325 70100 70200 69600 69800 12986010 51.9
英文:
library(dplyr)
"https://api.finance.naver.com/siseJson.naver?symbol=005930&requestType=1&startTime=20220312&endTime=20230517&timeframe=day" |>
readr::read_csv(col_names = c(
'날짜', '시가', '고가', '저가', '종가', '거래량', '외국인소진율'
)) |>
mutate(across(where(is.character), readr::parse_number)) |>
select(-last_col()) |>
slice(2:n())
Results:
# A tibble: 295 × 7
날짜 시가 고가 저가 종가 거래량 외국인소진율
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 20220314 70000 70200 69600 70200 9040993 51.8
2 20220315 69800 70100 69500 69500 10258562 51.8
3 20220316 70200 70500 69700 70400 10175750 51.8
4 20220317 71200 71800 70900 71200 17646315 51.8
5 20220318 70600 70900 70200 70700 14410038 51.8
6 20220321 70900 71000 69900 69900 11169002 51.8
7 20220322 69900 70500 69900 70300 9402666 51.8
8 20220323 70600 71200 70300 70500 12398025 51.7
9 20220324 69600 70300 69600 69800 37943357 52.0
10 20220325 70100 70200 69600 69800 12986010 51.9
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论