如何解决如何在Pysimplegui的Pandas中找到最高和最低值并将其汇总为一个字符串
我的熊猫有一个数据框
此代码是GUI中函数的一部分,我正在尝试创建一行字符串,以提及一个大洲内某个国家(而该大洲是从用户中选择)的最高COVID案例数。
这是我正在使用的数据集:https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv
我的图表将显示亚洲所有国家/地区的所有案件总数。 我正在尝试在plt.xlabel处再添加一行,以总结案例数最多的国家和案例数最少的国家。
预期输出: 在亚洲,X国的病例数最高,为xx,xxx例,X国的病例数最少,为x,xxx例。
这是我的代码:
import PySimpleGUI as sg
import matplotlib.pyplot as plt
def graphUI(dfIn):
# getting the continent and location to use as value for dropdown search
df_continent = dfIn[1].drop_duplicates()
df_location = dfIn[2].drop_duplicates()
#for LinePlot Options
linePlotList = ['New Case','Total Death','New Death']
layout = [
[sg.Text('display Graph')],[sg.Text('Continent:'),sg.Combo(df_continent.values[1:-1].tolist(),default_value="Continent",key="-continent-",change_submits=True)],[sg.Text('Location: '),sg.Combo(df_location.values[1:].tolist(),default_value="Location",key="-location-")],[sg.Text('Only For Line Plot: '),sg.Combo(linePlotList,default_value="New Case",key="-linePlot-")],[sg.Button('Bar plot',key='bar',tooltip='View Graph'),sg.Button('Line plot',key='line',sg.Button('Cancel')]
]
window = sg.Window('Search and Filter',layout)
while True:
event,values = window.read()
#on combo continent changes value,it will run the code below
if event == "-continent-":
if values['-continent-'] != df_continent.values[0]:
#run checkBoxUpdate function to update the list of country inside the selected continent
formUpdate = checkBoxupdate(dfIn,values['-continent-'])
#update the window by finding the element of location combo and update the latest country value
window.FindElement('-location-').Update(values=formUpdate.values[1:].tolist())
# Once user press Ok button,get all values and compare to df
if event == "bar":
searchedDf = dfIn[::]
if values['-continent-'] != df_continent.values[0]:
barchart(searchedDf,values)
if event == "line":
searchedDf = dfIn[::]
if values['-location-'] != df_continent.values[1]:
selectedLineChoice = values['-linePlot-']
linePlot(searchedDf,values,selectedLineChoice)
elif event == "Cancel" or event is None:
window.close()
return dfIn
def barchart(searchedDf,values) :
# getting the continent and location to use as value for dropdown search
searchedDf = searchedDf[searchedDf.isin([values['-continent-']]).any(axis=1)]
#drop duplicates country and keep latest
searchedDf = searchedDf.drop_duplicates(subset=[2],keep='last')
allcountry = list(searchedDf[2])
highestInfected = list(map(int,searchedDf[4]))
# Access the values which were entered and store in lists
plt.figure(figsize=(10,5))
plt.barh(allcountry,highestInfected)
#set axist label to smaller size
plt.tick_params(axis='y',which='major',labelsize=6)
plt.suptitle('Total Case of ' + values['-continent-'])
plt.xlabel('In ' + values['-continent-'] + 'has the most number of cases.' )
plt.show()
def linePlot(searchedDf,selectedLineChoice):
# getting the continent and location to use as value for dropdown search
searchedDf = searchedDf[searchedDf.isin([values['-location-']]).any(axis=1)]
eachDate = list(searchedDf[3])
if selectedLineChoice == 'New Case':
selectedLineChoiceValues = list(map(int,searchedDf[5]))
if selectedLineChoice == 'Total Death':
selectedLineChoiceValues = list(map(int,searchedDf[6]))
if selectedLineChoice == 'New Death':
selectedLineChoiceValues = list(map(int,searchedDf[7]))
#set frequency of the date on x axis to appear on lower freq
frequency = 50
plt.plot(eachDate,selectedLineChoiceValues)
plt.xticks(eachDate[::frequency])
plt.xticks(rotation=45)
plt.tick_params(axis='x',labelsize=6)
plt.suptitle('Total New Case of ' + values['-location-'])
plt.ylabel(selectedLineChoice,fontsize=10)
plt.show()
def checkBoxupdate(dfIn,input):
#search the DF for the selected continents
searchedDf = dfIn[dfIn.isin([input]).any(axis=1)]
#drop duplicates country of the selected continenets and return
df_location = searchedDf[2].drop_duplicates()
return df_location
解决方法
理解此问题的主题是在所选大洲的x轴标签上显示最大值和最小值,所以我创建了以下代码。
import matplotlib.pyplot as plt
import pandas as pd
import requests
url = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv'
df = pd.read_csv(url,sep=',')
df.fillna(0,inplace=True)
continent = 'Asia'
country = 'Indonesia'
searchedDf = df.copy()
searchedDf = searchedDf[searchedDf.isin([continent]).any(axis=1)]
# total_cases -> max,min
casesDf = searchedDf.copy()
cases_ = casesDf.groupby(['location'])[['date','total_cases']].last().reset_index()
cases_max_df = cases_[cases_['total_cases'] == max(cases_['total_cases'])]
cases_min_df = cases_[cases_['total_cases'] == min(cases_['total_cases'])]
searchedDf = searchedDf[searchedDf.isin([country]).any(axis=1)]
#drop duplicates country and keep latest
searchedDf = searchedDf.drop_duplicates(subset=['continent'],keep='last')
# print(searchedDf)
allcountry = list(searchedDf['location'])
highestInfected = list(map(int,searchedDf['total_cases']))
# Access the values which were entered and store in lists
plt.figure(figsize=(10,5))
plt.barh(allcountry,highestInfected)
#set axist label to smaller size
plt.tick_params(axis='y',which='major',labelsize=16)
plt.suptitle('Total Case of ' + continent)
labels = ('In ' + continent + ' has the most number of cases.\n'
+ str(cases_max_df['location'].values[0]) + ':' + str(cases_max_df['total_cases'].values[0]) + '\n'
+ str(cases_min_df['location'].values[0]) + ':' + str(cases_min_df['total_cases'].values[0]))
plt.xlabel(labels,fontsize=18)
plt.show()
import datetime
searchedDf['date'] = pd.to_datetime(searchedDf['date'])
searchedDf['yyyy-mm'] = str(searchedDf['date'].dt.year) + '-' + str(searchedDf['date'].dt.month)
month_gb = searchedDf.groupby('yyyy-mm')['total-cases'].sum()
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。