在pandas计算中出现错误。

huangapple go评论114阅读模式
英文:

Getting error in the calculation in pandas

问题

I am getting calculation errors while writing the groupby function with aggregate function in a loop. But, outside the loop everything is okay. Getting the results correctly...!

import pandas as pd
import numpy as np

# Example DataFrame
df = pd.DataFrame({
    'GroupA': ['A', 'A', 'B', 'B', 'B', 'C'],
    'GroupB': ['X', 'Y', 'Z', 'X', 'Y', 'X'],
    'POP': [10, 20, 30, 40, 50, 60],
    'LF': [1, 2, 3, 4, 5, 6],
    'WRK': [100, 200, 300, 400, 500, 600]
})

groupby_cols = [[], ['GroupA'], ['GroupB'], ['GroupA', 'GroupB']]

def test(df, gby):
    # Perform groupby and aggregation
    groupby_columns = groupby_cols[gby]
    w2 = df.groupby(groupby_columns).agg(
        pophat=('POP', lambda x: np.sum(x * df['CMULT'])),
        lfhat=('LF', lambda x: np.sum(x * df['CMULT'])),
        wrkhat=('WRK', lambda x: np.sum(x * df['CMULT']))
    ).reset_index()

    # Calculate CMULT column based on the current groupby configuration
    if len(groupby_columns) == 1:
        w2['CMULT'] = w2[groupby_columns[0]].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
    else:
        w2['CMULT'] = w2['GroupA'].map({'A': 0.5, 'B': 0.3, 'C': 0.2})

    print(w2, groupby_columns)

for i in range(len(groupby_cols)):
    if i == 0:
        df['CMULT'] = df['GroupA'].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
        df['POP'] = pd.to_numeric(df['POP']) * df['CMULT']
        df['LF'] = pd.to_numeric(df['LF']) * df['CMULT']
        df['WRK'] = pd.to_numeric(df['WRK']) * df['CMULT']
        df['no_sam'] = df.shape[0]

        agg_dict = {'POP': 'sum', 'LF': 'sum', 'WRK': 'sum', 'no_sam': 'count'}
        # Group the data by the current groupby configuration and calculate the aggregates
        w2 = df.agg(agg_dict).to_frame().T

        print(w2, groupby_cols[i])

    else:
        test(df, i)

This is the code, in which I am getting calculation errors. The results are -

POP   LF    WRK  no_sam
0  63.0  6.3  630.0     6.0 []
  GroupA  pophat  lfhat  wrkhat  CMULT
0      A     7.5   0.75    75.0    0.5
1      B    10.8   1.08   108.0    0.3
2      C     2.4   0.24    24.0    0.2 ['GroupA']
  GroupB  pophat  lfhat  wrkhat  CMULT
0      X     8.5   0.85    85.0    NaN
1      Y     9.5   0.95    95.0    NaN
2      Z     2.7   0.27    27.0    NaN ['GroupB']
  GroupA GroupB  pophat  lfhat  wrkhat  CMULT
0      A      X     2.5   0.25    25.0    0.5
1      A      Y     5.0   0.50    50.0    0.5
2      B      X     3.6   0.36    36.0    0.3
3      B      Y     4.5   0.45    45.0    0.3
4      B      Z     2.7   0.27    27.0    0.3
5      C      X     2.4   0.24    24.0    0.2 ['GroupA', 'GroupB']

But, outside the loop the results are - (You can verify it by changing the index of groupby_cols[NNNNNNNNN])

import pandas as pd
import numpy as np

# Example DataFrame
df = pd.DataFrame({
    'GroupA': ['A', 'A', 'B', 'B', 'B', 'C'],
    'GroupB': ['X', 'Y', 'Z', 'X', 'Y', 'X'],
    'POP': [10, 20, 30, 40, 50, 60],
    'LF': [1, 2, 3, 4, 5, 6],
    'WRK': [100, 200, 300, 400, 500, 600]
})

groupby_cols = [[], ['GroupA'], ['GroupB'], ['GroupA', 'GroupB']]

df['CMULT'] = df.groupby(groupby_cols[i])['GroupA'].transform(lambda x: x.map({'A': 0.5, 'B': 0.3, 'C': 0.2}))

# Perform groupby and aggregation based on the current groupby configuration

w2 = df.groupby(groupby_cols[3]).agg(
    pophat=('POP', lambda x: np.sum(x * df['CMULT'])),
    lfhat=('LF', lambda x: np.sum(x * df['CMULT'])),
    wrkhat=('WRK', lambda x: np.sum(x * df['CMULT']))
).reset_index()

print(w2)

The results outside the loop are -

     POP   LF    WRK  no_sam
0  63.0  6.3  630.0     6.0
  GroupA  pophat  lfhat  wrkhat
0      A    15.0    1.5   150.0
1      B    36.0    3.6   360.0
2      C    12.0    1.2   120.0
  GroupB  pophat  lfhat  wrkhat
0      X    29.0    2.9   290.0
1      Y    25.0    2.5   250.0
2      Z     9.0

<details>
<summary>英文:</summary>

I am getting calculation errors while writing the groupby function with aggregate function in a loop.But, outside the loop everything is okay. Getting the results correctly...!

    import pandas as pd
    import numpy as np
    
    # Example DataFrame
    df = pd.DataFrame({
        &#39;GroupA&#39;: [&#39;A&#39;, &#39;A&#39;, &#39;B&#39;, &#39;B&#39;, &#39;B&#39;, &#39;C&#39;],
        &#39;GroupB&#39;: [&#39;X&#39;, &#39;Y&#39;, &#39;Z&#39;, &#39;X&#39;, &#39;Y&#39;, &#39;X&#39;],
        &#39;POP&#39;: [10, 20, 30, 40, 50, 60],
        &#39;LF&#39;: [1, 2, 3, 4, 5, 6],
        &#39;WRK&#39;: [100, 200, 300, 400, 500, 600]
    })
    
    groupby_cols = [[], [&#39;GroupA&#39;], [&#39;GroupB&#39;], [&#39;GroupA&#39;, &#39;GroupB&#39;]]
    
    def test(df, gby):
        # Perform groupby and aggregation
        groupby_columns = groupby_cols[gby]
        w2 = df.groupby(groupby_columns).agg(
            pophat=(&#39;POP&#39;, lambda x: np.sum(x * df[&#39;CMULT&#39;])),
            lfhat=(&#39;LF&#39;, lambda x: np.sum(x * df[&#39;CMULT&#39;])),
            wrkhat=(&#39;WRK&#39;, lambda x: np.sum(x * df[&#39;CMULT&#39;]))
        ).reset_index()
    
        # Calculate CMULT column based on the current groupby configuration
        if len(groupby_columns) == 1:
            w2[&#39;CMULT&#39;] = w2[groupby_columns[0]].map({&#39;A&#39;: 0.5, &#39;B&#39;: 0.3, &#39;C&#39;: 0.2})
        else:
            w2[&#39;CMULT&#39;] = w2[&#39;GroupA&#39;].map({&#39;A&#39;: 0.5, &#39;B&#39;: 0.3, &#39;C&#39;: 0.2})
    
    
        print(w2, groupby_columns)
    
    for i in range(len(groupby_cols)):
        if i == 0:
            df[&#39;CMULT&#39;] = df[&#39;GroupA&#39;].map({&#39;A&#39;: 0.5, &#39;B&#39;: 0.3, &#39;C&#39;: 0.2})
            df[&#39;POP&#39;] = pd.to_numeric(df[&#39;POP&#39;]) * df[&#39;CMULT&#39;]
            df[&#39;LF&#39;] = pd.to_numeric(df[&#39;LF&#39;]) * df[&#39;CMULT&#39;]
            df[&#39;WRK&#39;] = pd.to_numeric(df[&#39;WRK&#39;]) * df[&#39;CMULT&#39;]
            df[&#39;no_sam&#39;] = df.shape[0]
    
            agg_dict = {&#39;POP&#39;: &#39;sum&#39;, &#39;LF&#39;: &#39;sum&#39;, &#39;WRK&#39;: &#39;sum&#39;, &#39;no_sam&#39;: &#39;count&#39;}
            # Group the data by the current groupby configuration and calculate the aggregates
            w2 = df.agg(agg_dict).to_frame().T
    
            print(w2, groupby_cols[i])
    
        else:
            test(df, i)


This is the code , in which I am getting calculation errors. The results are -     

    POP   LF    WRK  no_sam
    0  63.0  6.3  630.0     6.0 []
      GroupA  pophat  lfhat  wrkhat  CMULT
    0      A     7.5   0.75    75.0    0.5
    1      B    10.8   1.08   108.0    0.3
    2      C     2.4   0.24    24.0    0.2 [&#39;GroupA&#39;]
      GroupB  pophat  lfhat  wrkhat  CMULT
    0      X     8.5   0.85    85.0    NaN
    1      Y     9.5   0.95    95.0    NaN
    2      Z     2.7   0.27    27.0    NaN [&#39;GroupB&#39;]
      GroupA GroupB  pophat  lfhat  wrkhat  CMULT
    0      A      X     2.5   0.25    25.0    0.5
    1      A      Y     5.0   0.50    50.0    0.5
    2      B      X     3.6   0.36    36.0    0.3
    3      B      Y     4.5   0.45    45.0    0.3
    4      B      Z     2.7   0.27    27.0    0.3
    5      C      X     2.4   0.24    24.0    0.2 [&#39;GroupA&#39;, &#39;GroupB&#39;]

But, outside the loop the results are - (You can verify it by changing the index of groupby_cols[NNNNNNNNN])

    import pandas as pd
    import numpy as np
    
    # Example DataFrame
    df = pd.DataFrame({
        &#39;GroupA&#39;: [&#39;A&#39;, &#39;A&#39;, &#39;B&#39;, &#39;B&#39;, &#39;B&#39;, &#39;C&#39;],
        &#39;GroupB&#39;: [&#39;X&#39;, &#39;Y&#39;, &#39;Z&#39;, &#39;X&#39;, &#39;Y&#39;, &#39;X&#39;],
        &#39;POP&#39;: [10, 20, 30, 40, 50, 60],
        &#39;LF&#39;: [1, 2, 3, 4, 5, 6],
        &#39;WRK&#39;: [100, 200, 300, 400, 500, 600]
    })
    
    groupby_cols = [[], [&#39;GroupA&#39;], [&#39;GroupB&#39;], [&#39;GroupA&#39;, &#39;GroupB&#39;]]
    
    df[&#39;CMULT&#39;] = df.groupby(groupby_cols[i])[&#39;GroupA&#39;].transform(lambda x: x.map({&#39;A&#39;: 0.5, &#39;B&#39;: 0.3, &#39;C&#39;: 0.2}))
    
    # Perform groupby and aggregation based on the current groupby configuration

    w2 = df.groupby(groupby_cols[3]).agg(
        pophat=(&#39;POP&#39;, lambda x: np.sum(x * df[&#39;CMULT&#39;])),
        lfhat=(&#39;LF&#39;, lambda x: np.sum(x * df[&#39;CMULT&#39;])),
        wrkhat=(&#39;WRK&#39;, lambda x: np.sum(x * df[&#39;CMULT&#39;]))
    ).reset_index()
    
    print(w2)

             POP       LF       WRK         no_sam
     0       63.0      6.3      630.0       6.0 
    
      GroupA  pophat  lfhat  wrkhat
    0      A    15.0    1.5   150.0
    1      B    36.0    3.6   360.0
    2      C    12.0    1.2   120.0
    
      GroupB  pophat  lfhat  wrkhat
    0      X    29.0    2.9   290.0
    1      Y    25.0    2.5   250.0
    2      Z     9.0    0.9    90.0
    
      GroupA GroupB  pophat  lfhat  wrkhat
    0      A      X     5.0    0.5    50.0
    1      A      Y    10.0    1.0   100.0
    2      B      X    12.0    1.2   120.0
    3      B      Y    15.0    1.5   150.0
    4      B      Z     9.0    0.9    90.0
    5      C      X    12.0    1.2   120.0

So, am I not understanding the groupby and aggregation properly because It is not working in the loop or The functions work differently in the loop. I have doubt about it, how can It be possible???

</details>


# 答案1
**得分**: 1

Here are the translated parts of the code you provided:

```python
IIUC multiple columns before loop and then aggregate `sum` only:

groupby_cols = [[], ['GroupA'], ['GroupB'], ['GroupA', 'GroupB']]

def test(df, gby):
    groupby_columns = groupby_cols[gby]
    w2 = df.groupby(groupby_columns).agg(
        pophat=('POP', 'sum'),
        lfhat=('LF', 'sum'),
        wrkhat=('WRK', 'sum')
    ).reset_index()

    if len(groupby_columns) == 1:
        w2['CMULT'] = w2[groupby_columns[0]].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
    else:
        w2['CMULT'] = w2['GroupA'].map({'A': 0.5, 'B': 0.3, 'C': 0.2})

    print(w2, groupby_columns)

df['CMULT'] = df['GroupA'].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
df['POP'] = pd.to_numeric(df['POP']) * df['CMULT']
df['LF'] = pd.to_numeric(df['LF']) * df['CMULT']
df['WRK'] = pd.to_numeric(df['WRK']) * df['CMULT']
df['no_sam'] = df.shape[0]

for i in range(len(groupby_cols)):
    if i == 0:
        agg_dict = {'POP': 'sum', 'LF': 'sum', 'WRK': 'sum', 'no_sam': 'count'}
        w2 = df.agg(agg_dict).to_frame().T

        print(w2, groupby_cols[i])
    else:
        test(df, i)

Please note that I've only translated the code, and there are no additional comments or explanations.

英文:

IIUC multiple columns before loop and then aggregate sum only:

groupby_cols = [[], [&#39;GroupA&#39;], [&#39;GroupB&#39;], [&#39;GroupA&#39;, &#39;GroupB&#39;]]
def test(df, gby):
# print (df)
# Perform groupby and aggregation
groupby_columns = groupby_cols[gby]
w2 = df.groupby(groupby_columns).agg(
pophat=(&#39;POP&#39;, &#39;sum&#39;),
lfhat=(&#39;LF&#39;, &#39;sum&#39;),
wrkhat=(&#39;WRK&#39;, &#39;sum&#39;)
).reset_index()
# print (w2)
# Calculate CMULT column based on the current groupby configuration
if len(groupby_columns) == 1:
w2[&#39;CMULT&#39;] = w2[groupby_columns[0]].map({&#39;A&#39;: 0.5, &#39;B&#39;: 0.3, &#39;C&#39;: 0.2})
else:
w2[&#39;CMULT&#39;] = w2[&#39;GroupA&#39;].map({&#39;A&#39;: 0.5, &#39;B&#39;: 0.3, &#39;C&#39;: 0.2})
print(w2, groupby_columns)

df[&#39;CMULT&#39;] = df[&#39;GroupA&#39;].map({&#39;A&#39;: 0.5, &#39;B&#39;: 0.3, &#39;C&#39;: 0.2})
df[&#39;POP&#39;] = pd.to_numeric(df[&#39;POP&#39;]) * df[&#39;CMULT&#39;]
df[&#39;LF&#39;] = pd.to_numeric(df[&#39;LF&#39;]) * df[&#39;CMULT&#39;]
df[&#39;WRK&#39;] = pd.to_numeric(df[&#39;WRK&#39;]) * df[&#39;CMULT&#39;]
df[&#39;no_sam&#39;] = df.shape[0]
for i in range(len(groupby_cols)):
if i == 0:
agg_dict = {&#39;POP&#39;: &#39;sum&#39;, &#39;LF&#39;: &#39;sum&#39;, &#39;WRK&#39;: &#39;sum&#39;, &#39;no_sam&#39;: &#39;count&#39;}
# Group the data by the current groupby configuration and calculate the aggregates
w2 = df.agg(agg_dict).to_frame().T
print(w2, groupby_cols[i])
else:
test(df, i)

huangapple
  • 本文由 发表于 2023年6月1日 13:33:55
  • 转载请务必保留本文链接:https://go.coder-hub.com/76378911.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定