





import pandas as pd
df = pd.DataFrame({'Record': {0: 1,1: 2,2: 3},'Hospital': {0: 'Red Cross',1: 'Alberta Hospital',2: 'General Hospital'},'Hospital Address': {0: '1234 Street 429',1: '553 Alberta Road 441',2: '994 Random Street 923'},'Medicine_1': {0: 'Effective',1: 'Effecive',2: 'normal'},'Medicine_2': {0: 'Effective',1: 'normal',2: 'Effective'},'Medicine_3': {0: 'normal','Medicine_4': {0: 'Effective',1: 'Effective',2: 'Effective'}})

Record          Hospital       Hospital Address Medicine_1 Medicine_2 Medicine_3 Medicine_4  
     1         Red Cross        1234 Street 429  Effective  Effective     normal  Effective    
     2  Alberta Hospital   553 Alberta Road 441   Effecive     normal     normal  Effective
     3  General Hospital  994 Random Street 923     normal  Effective     normal  Effective


    Record          Hospital       Hospital Address        Name      Value
0        1         Red Cross        1234 Street 429  Medicine_1  Effective
1        2         Red Cross        1234 Street 429  Medicine_2  Effective
2        3         Red Cross        1234 Street 429  Medicine_3     normal
3        4         Red Cross        1234 Street 429  Medicine_4  Effective
4        5  Alberta Hospital   553 Alberta Road 441  Medicine_1   Effecive
5        6  Alberta Hospital   553 Alberta Road 441  Medicine_2     normal
6        7  Alberta Hospital   553 Alberta Road 441  Medicine_3     normal
7        8  Alberta Hospital   553 Alberta Road 441  Medicine_4  Effective
8        9  General Hospital  994 Random Street 923  Medicine_1     normal
9       10  General Hospital  994 Random Street 923  Medicine_2  Effective
10      11  General Hospital  994 Random Street 923  Medicine_3     normal
11      12  General Hospital  994 Random Street 923  Medicine_4  Effective

在查看PySpark示例时,情况很复杂:PySpark Dataframe melt columns into rows

再看看熊猫的例子,它看起来要容易得多。但是有很多不同的Stack Overflow答案,有人说使用 枢轴旋转,融化,堆叠,取消堆叠 ,还有更多的结果使人迷惑。





df_final =  (df.set_index(['Record','Hospital','Hospital Address'])
               .rename({'level_3': 'Name'},axis=1)
               .assign(Record=lambda x: x.index+1))

    Record          Hospital       Hospital Address       Name       Value
0        1         Red Cross        1234 Street 429  Medicine_1  Effective
1        2         Red Cross        1234 Street 429  Medicine_2  Effective
2        3         Red Cross        1234 Street 429  Medicine_3     Normal
3        4         Red Cross        1234 Street 429  Medicine_4  Effective
4        5  Alberta Hospital   553 Alberta Road 441  Medicine_1   Effecive
5        6  Alberta Hospital   553 Alberta Road 441  Medicine_2     Normal
6        7  Alberta Hospital   553 Alberta Road 441  Medicine_3     Normal
7        8  Alberta Hospital   553 Alberta Road 441  Medicine_4  Effective
8        9  General Hospital  994 Random Street 923  Medicine_1     Normal
9       10  General Hospital  994 Random Street 923  Medicine_2  Effective
10      11  General Hospital  994 Random Street 923  Medicine_3     Normal
11      12  General Hospital  994 Random Street 923  Medicine_4  Effective



import pandas as pd
df = pd.DataFrame({'Record': {0: 1,1: 2,2: 3},'Hospital': {0: 'Red Cross',1: 'Alberta Hospital',2: 'General Hospital'},'Hospital Address': {0: '1234 Street 429',1: '553 Alberta Road 441',2: '994 Random Street 923'},'Medicine_1': {0: 'Effective',1: 'Effecive',2: 'Normal'},'Medicine_2': {0: 'Effective',1: 'Normal',2: 'Effective'},'Medicine_3': {0: 'Normal','Medicine_4': {0: 'Effective',1: 'Effective',2: 'Effective'}})


df = (df.melt(id_vars=['Record','Hospital Address'],var_name='Name',value_name='Value')
df['Record'] = df.index+1
    Record          Hospital       Hospital Address        Name      Value
0        1         Red Cross        1234 Street 429  Medicine_1  Effective
1        2         Red Cross        1234 Street 429  Medicine_2  Effective
2        3         Red Cross        1234 Street 429  Medicine_3     Normal
3        4         Red Cross        1234 Street 429  Medicine_4  Effective
4        5  Alberta Hospital   553 Alberta Road 441  Medicine_1   Effecive
5        6  Alberta Hospital   553 Alberta Road 441  Medicine_2     Normal
6        7  Alberta Hospital   553 Alberta Road 441  Medicine_3     Normal
7        8  Alberta Hospital   553 Alberta Road 441  Medicine_4  Effective
8        9  General Hospital  994 Random Street 923  Medicine_1     Normal
9       10  General Hospital  994 Random Street 923  Medicine_2  Effective
10      11  General Hospital  994 Random Street 923  Medicine_3     Normal
11      12  General Hospital  994 Random Street 923  Medicine_4  Effective


# create sample data 
import pandas as pd
from pyspark.sql.functions import expr
panda_df = pd.DataFrame({'Record': {0: 1,2: 'Effective'}})
df = spark.createDataFrame(panda_df)

# calculate
df.select("Hospital","Hospital Address",expr("stack(4,'Medicine_1',Medicine_1,'Medicine_2',Medicine_2,\
          'Medicine_3',Medicine_3,'Medicine_4',Medicine_4) as (MedicinName,Effectiveness)")
         ).where("Effectiveness is not null").show()



index_cols= ["Hospital","Hospital Address"]
drop_cols = ['Record']
# Select all columns which needs to be pivoted down
pivot_cols = [c  for c in df.columns if c not in index_cols+drop_cols ]
# Create a dynamic stackexpr in this case we are generating stack(4,'{0}',{0},'{1}',{1}...)
# " '{0}',{1}".format('Medicine1','Medicine2') = "'Medicine1',Medicine1,'Medicine2',Medicine2"
# which is similiar to what we have previously
stackexpr = "stack("+str(len(pivot_cols))+","+",".join(["'{"+str(i)+"}',{"+str(i)+"}" for i in range(len(pivot_cols))]) +")"
df.selectExpr(*index_cols,stackexpr.format(*pivot_cols) ).show()


|        Hospital|    Hospital Address|MedicinName|Effectiveness|
|       Red Cross|     1234 Street 429| Medicine_1|    Effective|
|       Red Cross|     1234 Street 429| Medicine_2|    Effective|
|       Red Cross|     1234 Street 429| Medicine_3|       Normal|
|       Red Cross|     1234 Street 429| Medicine_4|    Effective|
|Alberta Hospital|553 Alberta Road 441| Medicine_1|     Effecive|
|Alberta Hospital|553 Alberta Road 441| Medicine_2|       Normal|
|Alberta Hospital|553 Alberta Road 441| Medicine_3|       Normal|
|Alberta Hospital|553 Alberta Road 441| Medicine_4|    Effective|
|General Hospital|994 Random Street...| Medicine_1|       Normal|
|General Hospital|994 Random Street...| Medicine_2|    Effective|
|General Hospital|994 Random Street...| Medicine_3|       Normal|
|General Hospital|994 Random Street...| Medicine_4|    Effective|

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。