微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

R:使用 qplot 绘制数据

如何解决R:使用 qplot 绘制数据

movies_df一个包含 100 条记录和结构的数据框:

s

使用qplot

绘制具有最高运行时间的流派

qplot(data = movies_df,Runtime,fill = Genre,bins = 30)

enter image description here



从上图中,4 部动作片的运行时间最高(160)

如何获取流派 = Action(运行时间最长的流派)的 Title 电影的情节?

重现数据的代码

library("rvest")


url = "https://www.imdb.com/search/title?count=100&release_date=2016,2016&title_type=feature"

webpage = read_html(url)

## ---- PRE-PROCESSING ---- ##

# rank scraping
rank_data_html = html_nodes(webpage,".text-primary")
rank_data = html_text(rank_data_html)
rank_data = as.numeric(rank_data)

#title scraping
title_data_html = html_nodes (webpage,".lister-item-header a")
title_data = html_text(title_data_html)

#description scraping
desc_nodes = html_nodes(webpage,".ratings-bar+.text-muted")
desc_data = html_text(desc_nodes)
desc_data = gsub("\n","",desc_data)


runtime_data_html = html_nodes (webpage,".text-muted .runtime")
runtime_data = html_text(runtime_data_html)
runtime_data = gsub(" min",runtime_data)
runtime_data = as.numeric(runtime_data)

genre_data_html = html_nodes (webpage,".genre")
genre_data = html_text (genre_data_html)
genre_data = gsub("\n",genre_data)
genre_data = gsub (" ",genre_data)
genre_data = gsub(",.*",genre_data)
genre_data = as.factor(genre_data)

rating_data_html = html_nodes(webpage,".ratings-imdb-rating strong")
rating_data = html_text(rating_data_html)
rating_data = as.numeric(rating_data)

Votes_data_html = html_nodes(webpage,".sort-num_Votes-visible span:nth-child(2)")
Votes_data = html_text(Votes_data_html)
Votes_data = gsub(",",Votes_data)
Votes_data = as.numeric(Votes_data)

directors_data_html = html_nodes(webpage,".text-muted+ p a:nth-child(1)")
directors_data = html_text(directors_data_html)
directors_data = as.factor(directors_data)

actors_data_html = html_nodes(webpage,".lister-item-content .ghost+ a")
actors_data = html_text(actors_data_html)
actors_data = as.factor(actors_data)

Metascore_data_html = html_nodes(webpage,".Metascore")
Metascore_data = html_text(Metascore_data_html)
Metascore_data = gsub(" ",Metascore_data)

for (i in c(39,73,80)){
  a = Metascore_data[1:(i-1)]
  b = Metascore_data[i:length(Metascore_data)]
  Metascore_data = append(a,list("NA"))
  Metascore_data = append(Metascore_data,b)
  Metascore_data = as.numeric(Metascore_data)
}


gross_data_html = html_nodes(webpage,".ghost~ .text-muted+ span")
gross_data = html_text(gross_data_html)
gross_data = gsub("M",gross_data)  
gross_data = substring(gross_data,2,6)

for (i in c(1,3,4,5,6,7,8,9,10)){
  a = gross_data[1:(i-1)]
  b = gross_data[i:length(gross_data)]
  gross_data = append(a,list("NA"))
  gross_data = append(gross_data,b)
}

gross_data = as.numeric(gross_data)



movies_df = data.frame(Rank = rank_data,Title = title_data,Description = desc_data,Runtime = runtime_data,Genre = genre_data,rating = rating_data,Metascore = Metascore_data,Votes = Votes_data,Gross_Earning_in_Mil = gross_data,Director = directors_data,Actor = actors_data)

解决方法

首先您需要安装包 dplyr 并加载库以过滤您的数据。 然后,您需要将数据过滤为所有类型 = Action 的电影。就这样

library(dplyr)
newDataset  <- movies_df %>% filter( Genre == "Action" ) 
qplot(data = newDataset,Runtime,fill = Title,bins = 30)

如果对您有帮助,请给我的答案打分。我希望,现在一切都清楚了

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。