如何解决R:使用 qplot 绘制数据
movies_df
是一个包含 100 条记录和结构的数据框:
使用qplot
qplot(data = movies_df,Runtime,fill = Genre,bins = 30)
从上图中,4 部动作片的运行时间最高(160)
如何获取流派 = Action(运行时间最长的流派)的 Title
电影的情节?
重现数据的代码:
library("rvest")
url = "https://www.imdb.com/search/title?count=100&release_date=2016,2016&title_type=feature"
webpage = read_html(url)
## ---- PRE-PROCESSING ---- ##
# rank scraping
rank_data_html = html_nodes(webpage,".text-primary")
rank_data = html_text(rank_data_html)
rank_data = as.numeric(rank_data)
#title scraping
title_data_html = html_nodes (webpage,".lister-item-header a")
title_data = html_text(title_data_html)
#description scraping
desc_nodes = html_nodes(webpage,".ratings-bar+.text-muted")
desc_data = html_text(desc_nodes)
desc_data = gsub("\n","",desc_data)
runtime_data_html = html_nodes (webpage,".text-muted .runtime")
runtime_data = html_text(runtime_data_html)
runtime_data = gsub(" min",runtime_data)
runtime_data = as.numeric(runtime_data)
genre_data_html = html_nodes (webpage,".genre")
genre_data = html_text (genre_data_html)
genre_data = gsub("\n",genre_data)
genre_data = gsub (" ",genre_data)
genre_data = gsub(",.*",genre_data)
genre_data = as.factor(genre_data)
rating_data_html = html_nodes(webpage,".ratings-imdb-rating strong")
rating_data = html_text(rating_data_html)
rating_data = as.numeric(rating_data)
Votes_data_html = html_nodes(webpage,".sort-num_Votes-visible span:nth-child(2)")
Votes_data = html_text(Votes_data_html)
Votes_data = gsub(",",Votes_data)
Votes_data = as.numeric(Votes_data)
directors_data_html = html_nodes(webpage,".text-muted+ p a:nth-child(1)")
directors_data = html_text(directors_data_html)
directors_data = as.factor(directors_data)
actors_data_html = html_nodes(webpage,".lister-item-content .ghost+ a")
actors_data = html_text(actors_data_html)
actors_data = as.factor(actors_data)
Metascore_data_html = html_nodes(webpage,".Metascore")
Metascore_data = html_text(Metascore_data_html)
Metascore_data = gsub(" ",Metascore_data)
for (i in c(39,73,80)){
a = Metascore_data[1:(i-1)]
b = Metascore_data[i:length(Metascore_data)]
Metascore_data = append(a,list("NA"))
Metascore_data = append(Metascore_data,b)
Metascore_data = as.numeric(Metascore_data)
}
gross_data_html = html_nodes(webpage,".ghost~ .text-muted+ span")
gross_data = html_text(gross_data_html)
gross_data = gsub("M",gross_data)
gross_data = substring(gross_data,2,6)
for (i in c(1,3,4,5,6,7,8,9,10)){
a = gross_data[1:(i-1)]
b = gross_data[i:length(gross_data)]
gross_data = append(a,list("NA"))
gross_data = append(gross_data,b)
}
gross_data = as.numeric(gross_data)
movies_df = data.frame(Rank = rank_data,Title = title_data,Description = desc_data,Runtime = runtime_data,Genre = genre_data,rating = rating_data,Metascore = Metascore_data,Votes = Votes_data,Gross_Earning_in_Mil = gross_data,Director = directors_data,Actor = actors_data)
解决方法
首先您需要安装包 dplyr 并加载库以过滤您的数据。 然后,您需要将数据过滤为所有类型 = Action 的电影。就这样
library(dplyr)
newDataset <- movies_df %>% filter( Genre == "Action" )
qplot(data = newDataset,Runtime,fill = Title,bins = 30)
如果对您有帮助,请给我的答案打分。我希望,现在一切都清楚了
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。