微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

R 没有使用 naniar 包或 colSums 检测所有缺失值

如何解决R 没有使用 naniar 包或 colSums 检测所有缺失值

我使用的数据是来自 kaggle (https://www.kaggle.com/harshitshankhdhar/imdb-dataset-of-top-1000-movies-and-tv-shows/download) 的前 1000 名 IMDB 电影

这里为了演示这个问题,我正在考虑丢弃一些变量后的前 100 个观察值。 (原谅我有点长)

structure(list(Released_Year = structure(c(73L,51L,87L,53L,36L,82L,73L,72L,89L,78L,80L,45L,81L,69L,59L,54L,99L,98L,93L,77L,76L,74L,70L,56L,41L,33L,25L,90L,85L,79L,67L,64L,47L,39L,21L,15L,10L,97L,96L,95L,91L,88L,86L,63L,60L,58L,50L,42L,43L,29L,19L,92L,84L,83L,76L),.Label = c("1920","1921","1922","1924","1925","1926","1927","1928","1930","1931","1932","1933","1934","1935","1936","1937","1938","1939","1940","1941","1942","1943","1944","1945","1946","1947","1948","1949","1950","1951","1952","1953","1954","1955","1956","1957","1958","1959","1960","1961","1962","1963","1964","1965","1966","1967","1968","1969","1970","1971","1972","1973","1974","1975","1976","1977","1978","1979","1980","1981","1982","1983","1984","1985","1986","1987","1988","1989","1990","1991","1992","1993","1994","1995","1996","1997","1998","1999","2000","2001","2002","2003","2004","2005","2006","2007","2008","2009","2010","2011","2012","2013","2014","2015","2016","2017","2018","2019","2020","PG"),class = "factor"),Certificate = structure(c(3L,3L,16L,14L,9L,1L,8L,5L,7L,11L,14L),.Label = c("","16","A","Approved","G","GP","Passed","PG","PG-13","R","TV-14","TV-MA","TV-PG","U","U/A","UA","Unrated"),Runtime = structure(c(43L,75L,137L,94L,55L,49L,40L,62L,37L,34L,61L,31L,26L,17L,28L,22L,23L,13L,52L,20L,129L,38L,130L,66L,128L,27L,48L,18L,6L,65L,71L,139L,2L,35L,105L,44L,136L,24L,32L,27L),.Label = c("100 min","101 min","102 min","103 min","104 min","105 min","106 min","107 min","108 min","109 min","110 min","111 min","112 min","113 min","114 min","115 min","116 min","117 min","118 min","119 min","120 min","121 min","122 min","123 min","124 min","125 min","126 min","127 min","128 min","129 min","130 min","131 min","132 min","133 min","134 min","135 min","136 min","137 min","138 min","139 min","140 min","141 min","142 min","143 min","144 min","145 min","146 min","147 min","148 min","149 min","150 min","151 min","152 min","153 min","154 min","155 min","156 min","157 min","158 min","159 min","160 min","161 min","162 min","163 min","164 min","165 min","166 min","167 min","168 min","169 min","170 min","171 min","172 min","174 min","175 min","177 min","178 min","179 min","180 min","181 min","183 min","184 min","185 min","186 min","188 min","189 min","191 min","192 min","193 min","194 min","195 min","196 min","197 min","201 min","202 min","204 min","205 min","207 min","209 min","210 min","212 min","220 min","224 min","228 min","229 min","238 min","242 min","321 min","45 min","64 min","67 min","68 min","69 min","70 min","71 min","72 min","75 min","76 min","78 min","79 min","80 min","81 min","82 min","83 min","84 min","85 min","86 min","87 min","88 min","89 min","90 min","91 min","92 min","93 min","94 min","95 min","96 min","97 min","98 min","99 min"),Genre = structure(c(138L,123L,12L,138L,173L,202L,107L,183L,124L,106L,133L,140L,163L,170L,195L,176L,102L,185L,101L,171L,200L,159L,172L,196L,167L,150L,108L,181L,145L,174L,120L,173L),.Label = c("Action,Adventure","Action,Adventure,Biography",Comedy",Crime",Drama",Family",Fantasy",History",Horror",Mystery",Romance",Sci-Fi",Thriller",War",Western",Biography,Comedy,Crime,Drama,Sport",Mystery,Sci-Fi,"Adventure,Film-Noir",Musical",Family,History,Horror,"Animation,Action,"Biography,Music","Comedy","Comedy,Fantasy,Music,Musical,"Crime,Film-Noir,"Drama","Drama,Romance,Thriller,War,"Family,"Fantasy,"Film-Noir,"Horror","Horror,"Mystery,"Thriller","Western"),IMDB_rating = c(9.3,9.2,9,8.9,8.8,8.7,8.6,8.5,8.4,8.3,8.3),Meta_score = c(80L,100L,NA,57L,68L,70L),No_of_Votes = c(2343110L,1620367L,2303232L,1129952L,689845L,1642758L,1826188L,1213505L,2067042L,1854740L,1661481L,1809221L,688390L,1485555L,1676426L,1020727L,1159315L,918088L,55291L,552778L,54995L,1512360L,699256L,651376L,1235804L,1147794L,623629L,1445096L,1270197L,1231473L,42004L,315744L,405801L,939252L,717585L,760360L,1190259L,1189773L,729603L,1341460L,1034705L,991208L,1035236L,942045L,995506L,230763L,235231L,1058081L,302844L,604211L,522093L,217881L,167839L,62635L,34112L,28401L,194838L,156479L,375110L,809955L,834477L,384171L,1357682L,1516346L,344445L,168895L,999790L,358685L,515451L,1125712L,343171L,311365L,884112L,898237L,606398L,787806L,30273L,34357L,450474L,108862L,178092L,444074L,201632L,203150L,425844L,27793L,71875L,30722L,281623L,220002L,150023L,33935L,78925L,1267869L,911664L,703810L,782001L,766870L,1069738L,861606L),Gross = structure(c(386L,152L,636L,664L,518L,493L,820L,399L,489L,442L,461L,673L,472L,219L,571L,398L,632L,240L,723L,308L,154L,667L,146L,454L,378L,464L,132L,134L,630L,452L,239L,683L,330L,246L,544L,293L,304L,593L,445L,244L,586L,115L,248L,789L,709L,298L,193L,558L,681L,323L,735L,357L,271L,349L,551L,781L,764L,383L,769L,485L,392L,711L,718L,688L,119L,467L,458L,429L,421L,144L,155L),"1,000,045",008,098",010,414",024,560",033,895",035,953",037,847",054,361",059,830",079,369",082,715",092,800",105,564",111,061",113,541",122,527",185,783",221,261",223,240",869",229,197",236,166",241,223",305",324,974",330,596",339,152",373,943",378,435",429,534",436,000",464,625",480,006",498,210",506,975",526,530,386",544,889",585,634",626,289",647,780",661,096",670,773",742,348",752,214",769,782,795",787,378",794,187",924,733",999,955","10,019,307",055,859",095,170",177",301,706",550,600,616,104",631,333",680,275",725,228",824,921",900,950","100,012,499",119",125,643",206,256",492,203",546,139","101,157,447","102,021",272,727",308,515,793","104,454,762",945,"106,260,662",954,678","107,100,855",509,799",825,862",928,"108,101,638,745","109,767,581","11,286,112",403,529",487,676",798,616",990,401","111,110,575",543,479",722,"112,"115,646,235",654,751","116,694","117,235,247",624,028","118,500,"119,285,432",519,402","12,064,472",281,500",633",391,761",465,371",535,562","120,540,719",620,254","121,683","124,872,350",987,023","125,618,201","127,326","128,934",078,872",261,724",392",985","13,060,843",642",182,281",275,417,292",474,588",542,874",622,657,115",756,082",780,024",838","130,096,601",164,645",922","132,072,926",088,635",958",384,315",422,809","134,966,411","136,025,503",801,374","138,433,565",730",797,449","14,131",331",677,674",743,391","141,072",319,928",340,178",843,612","142,502,728","144,501","145,989","146,408,"148,302",478,011","15,070,285",090,400",280",322,539,266",630,710","151,086",803","154,058,340","156,452,370","159,227,644","16,056,255",217,290,476",501,785",372","161,197,"162,805,434","163,214,286",245",566,459","164,615,351","165,359,520","167,445,960",189","169,209",607,287",659",708,"17,219",108,591",114,882",266,971",804",741",570,324",605,861",912",738,570","170,341","171,243,005",479,930","172,885","173,837,933","175,058","176,040,665",941","177,002,924",345","178,800,"18,701",254,702",051",354,356",593,156","181,655","183,150",637,894",875,760","184,208,848","187,705,427","188,020,017",161","189,"19,181",202,743",238",516,"190,310","191,796,233","193,817","197,171,806","198,676,"2,006,788",015,810",076,020",084,637",086,060",065",150,181,987",199,675",201,126",561",222,647",237,280,375,308",380,402,067",537,603,625,650,734,044",804,807,390",832,029",852,892,921,738","20,045,186,300,218","200,821,936","203,300","204,"206,654","208,545,589","209,028,679",726,015","21,919",848,932",877",995,263","210,609,614,939","215,288,866","216,428,042",909","217,350,581,231","218,967,620","22,238,696",244,207",245,276,455,976",490,039",494,487",858,968","220,159,"222,527,828","223,808,164","226,277,068","227,471,070","228,663",778,661","23,089,341,568",383,892",265","232,906,145","233,632,142",986","234,723","238,507",124","24,149,632",379,978",475,416",611,633,212","245,179","248,757,"249,358,"25,410",442,514,517",867",568,251",812","251,513,"255,959,475","257,730,019",760,692","258,168",366,"259,127",766,572","26,957",603",400,640",830,862,450",947,624","260,"261,441,092","267,665,"269,"27,298,445","274,705","275,902","277,"28,262,574",469",965,"280,"285,761,243","288,"289,916,"29,133,"290,013,036","292,576,195","293,004,"295,983,"296,"3,029,081",081,925",107,485",151,130",200,118",270,296",313,513",333,969",518",754",600",635,482",759,854",699",897,569",969,893",981,"30,177,511",328,857,814",933,"300,"301,"303,003,"304,360,277","305,413,918","309,409",811","31,"315,750","317,575,550","318,412,101","32,381,416,586",481,825",534,850",572,577",868,"322,740,140","324,591,735","327,"33,080,084",225,395,426","330,252,182","332,"333,176,"335,451,311",609","336,"34,301",700,291","341,268,248","342,551,365","348,660","349,555","35,014,192",061,552,383",739,802",811,509",893,537","356,461,711","36,491",764,313",948,322","363,709","368,234","37,030,102",634,615",707,823,"377,845,905","38,405,088",938","380,"381,011,"389,813,"39,481",567","4,018,695",691",043,686",050,200",065,116",123",135,184,231,398,414,535",420,756",496,583",711,890,878",905,971,"40,041",514",311,852",052",903,593","402,453,"408,349","41,909,"410,"412,544","415,880","42,598",438,765,"422,783,777","43,776",984,230","434,038,008","435,554","44,017,453",671,682",785,053",144",908,"448,139,099","449,191","45,915",512,466",598,982",171","453,"46,357,836,394",889,293","47,212,904",695,120","48,023,071,303",169,908",323,648",979,328","49,"495,770","5,009,677",246",128,582",209,580",216,888",321,508",834",450,040",405",595,428",617,720,376",820,649",887,457",904,366",923,075","50,668,906",690",866,927,970","502,"51,062",401,758",613",495",973,"515,"52,287,364,010",929,775","53,891",267,367,844",606,916",710,"532,"534,444","536,"539,540","54,117,234,740",580,"541,940","547,"548,707","55,"553,"56,116,183",362,352",505,993",816,992","566,"57,141,226",890",262",504,069",938,693","59,318",735,548",891,"6,013",979",153,167,203,207,725",436",460",532,719,864",492","600,"608,744","61,001",503,649,911","623,279,547","63,895,607","64,"65,"654,"659,325,379","66,257,002",666,"67,818","678,815,"686,"687,185","69,951,824","696,"697,"7,098,220,585",461",563,397",993,"70,099,136,147",259,870",498",511,035",973","707,"71,617","72,"733,094","739,478","74,103,820",283,"741,283","75,668",229",331,856",590,"752,"76,454",097",907","760,507,"765,"77,422",911,774","776,"78,912,963","8,627",175,178,264,530",284,819,"80,"81,001,787","82,418,"83,"845,464","85,160,433","857,524","858,"86,"871,"88,"881,"89,"898,"9,170,439,923",460,135","90,"901,610","91,"92,159","923,221","93,009","933,"936,662,225","95,860,"959,"96,522,687",898,962,"977,375","98,467,863","985,912"),class = "factor")),row.names = c(NA,100L),class = "data.frame")

现在,如果我使用 colSums 或 naniar 来计算或可视化缺失值,它只会显示 Meta_score 变量的值。其他的没有显示。这里有什么问题?我该如何解决

解决方法

因为有一个 tidyverse 标签:

这是用 dplyr 替换空值的 NA 方法。将 df 替换为您的数据集名称。请注意,我包含了 summarise 语句来对 NA 值求和,以表明它们现在存在并在其他变量中被考虑在内。


library(dplyr)

df %>% 
  mutate(across(everything(),~na_if(.x,""))) %>% 
  summarise(across(everything(),~sum(is.na(.x))))

#>   Released_Year Certificate Runtime Genre IMDB_Rating Meta_score No_of_Votes
#> 1             0           4       0     0           0         16           0
#>   Gross
#> 1    15

reprex package (v0.3.0) 于 2021 年 3 月 23 日创建

,

在您输入的数据中,除了元列中没有缺失值(NA)

Gross 列看起来像是缺失值,但实际上是空字符串 "",因为该列误导性地是一个字符列(应该清理)

这会在该列中为您提供适当的 NA

your.data$Gross[ your.data$Gross == ""  ] <- NA

但您可能应该删除逗号并将其设为数字​​。

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。