问题

I am getting calculation errors while writing the groupby function with aggregate function in a loop. But, outside the loop everything is okay. Getting the results correctly...!

import pandas as pd
import numpy as np
# Example DataFrame
df = pd.DataFrame({
    'GroupA': ['A', 'A', 'B', 'B', 'B', 'C'],
    'GroupB': ['X', 'Y', 'Z', 'X', 'Y', 'X'],
    'POP': [10, 20, 30, 40, 50, 60],
    'LF': [1, 2, 3, 4, 5, 6],
    'WRK': [100, 200, 300, 400, 500, 600]
})
groupby_cols = [[], ['GroupA'], ['GroupB'], ['GroupA', 'GroupB']]
def test(df, gby):
    # Perform groupby and aggregation
    groupby_columns = groupby_cols[gby]
    w2 = df.groupby(groupby_columns).agg(
        pophat=('POP', lambda x: np.sum(x * df['CMULT'])),
        lfhat=('LF', lambda x: np.sum(x * df['CMULT'])),
        wrkhat=('WRK', lambda x: np.sum(x * df['CMULT']))
    ).reset_index()
    # Calculate CMULT column based on the current groupby configuration
    if len(groupby_columns) == 1:
        w2['CMULT'] = w2[groupby_columns[0]].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
    else:
        w2['CMULT'] = w2['GroupA'].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
    print(w2, groupby_columns)
for i in range(len(groupby_cols)):
    if i == 0:
        df['CMULT'] = df['GroupA'].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
        df['POP'] = pd.to_numeric(df['POP']) * df['CMULT']
        df['LF'] = pd.to_numeric(df['LF']) * df['CMULT']
        df['WRK'] = pd.to_numeric(df['WRK']) * df['CMULT']
        df['no_sam'] = df.shape[0]
        agg_dict = {'POP': 'sum', 'LF': 'sum', 'WRK': 'sum', 'no_sam': 'count'}
        # Group the data by the current groupby configuration and calculate the aggregates
        w2 = df.agg(agg_dict).to_frame().T
        print(w2, groupby_cols[i])
    else:
        test(df, i)

This is the code, in which I am getting calculation errors. The results are -

POP   LF    WRK  no_sam
0  63.0  6.3  630.0     6.0 []
  GroupA  pophat  lfhat  wrkhat  CMULT
0      A     7.5   0.75    75.0    0.5
1      B    10.8   1.08   108.0    0.3
2      C     2.4   0.24    24.0    0.2 ['GroupA']
  GroupB  pophat  lfhat  wrkhat  CMULT
0      X     8.5   0.85    85.0    NaN
1      Y     9.5   0.95    95.0    NaN
2      Z     2.7   0.27    27.0    NaN ['GroupB']
  GroupA GroupB  pophat  lfhat  wrkhat  CMULT
0      A      X     2.5   0.25    25.0    0.5
1      A      Y     5.0   0.50    50.0    0.5
2      B      X     3.6   0.36    36.0    0.3
3      B      Y     4.5   0.45    45.0    0.3
4      B      Z     2.7   0.27    27.0    0.3
5      C      X     2.4   0.24    24.0    0.2 ['GroupA', 'GroupB']

But, outside the loop the results are - (You can verify it by changing the index of groupby_cols[NNNNNNNNN])

import pandas as pd
import numpy as np
# Example DataFrame
df = pd.DataFrame({
    'GroupA': ['A', 'A', 'B', 'B', 'B', 'C'],
    'GroupB': ['X', 'Y', 'Z', 'X', 'Y', 'X'],
    'POP': [10, 20, 30, 40, 50, 60],
    'LF': [1, 2, 3, 4, 5, 6],
    'WRK': [100, 200, 300, 400, 500, 600]
})
groupby_cols = [[], ['GroupA'], ['GroupB'], ['GroupA', 'GroupB']]
df['CMULT'] = df.groupby(groupby_cols[i])['GroupA'].transform(lambda x: x.map({'A': 0.5, 'B': 0.3, 'C': 0.2}))
# Perform groupby and aggregation based on the current groupby configuration
w2 = df.groupby(groupby_cols[3]).agg(
    pophat=('POP', lambda x: np.sum(x * df['CMULT'])),
    lfhat=('LF', lambda x: np.sum(x * df['CMULT'])),
    wrkhat=('WRK', lambda x: np.sum(x * df['CMULT']))
).reset_index()
print(w2)

The results outside the loop are -

     POP   LF    WRK  no_sam
0  63.0  6.3  630.0     6.0
  GroupA  pophat  lfhat  wrkhat
0      A    15.0    1.5   150.0
1      B    36.0    3.6   360.0
2      C    12.0    1.2   120.0
  GroupB  pophat  lfhat  wrkhat
0      X    29.0    2.9   290.0
1      Y    25.0    2.5   250.0
2      Z     9.0
<details>
<summary>英文:</summary>
I am getting calculation errors while writing the groupby function with aggregate function in a loop.But, outside the loop everything is okay. Getting the results correctly...!
    import pandas as pd
    import numpy as np
    
    # Example DataFrame
    df = pd.DataFrame({
        &#39;GroupA&#39;: [&#39;A&#39;, &#39;A&#39;, &#39;B&#39;, &#39;B&#39;, &#39;B&#39;, &#39;C&#39;],
        &#39;GroupB&#39;: [&#39;X&#39;, &#39;Y&#39;, &#39;Z&#39;, &#39;X&#39;, &#39;Y&#39;, &#39;X&#39;],
        &#39;POP&#39;: [10, 20, 30, 40, 50, 60],
        &#39;LF&#39;: [1, 2, 3, 4, 5, 6],
        &#39;WRK&#39;: [100, 200, 300, 400, 500, 600]
    })
    
    groupby_cols = [[], [&#39;GroupA&#39;], [&#39;GroupB&#39;], [&#39;GroupA&#39;, &#39;GroupB&#39;]]
    
    def test(df, gby):
        # Perform groupby and aggregation
        groupby_columns = groupby_cols[gby]
        w2 = df.groupby(groupby_columns).agg(
            pophat=(&#39;POP&#39;, lambda x: np.sum(x * df[&#39;CMULT&#39;])),
            lfhat=(&#39;LF&#39;, lambda x: np.sum(x * df[&#39;CMULT&#39;])),
            wrkhat=(&#39;WRK&#39;, lambda x: np.sum(x * df[&#39;CMULT&#39;]))
        ).reset_index()
    
        # Calculate CMULT column based on the current groupby configuration
        if len(groupby_columns) == 1:
            w2[&#39;CMULT&#39;] = w2[groupby_columns[0]].map({&#39;A&#39;: 0.5, &#39;B&#39;: 0.3, &#39;C&#39;: 0.2})
        else:
            w2[&#39;CMULT&#39;] = w2[&#39;GroupA&#39;].map({&#39;A&#39;: 0.5, &#39;B&#39;: 0.3, &#39;C&#39;: 0.2})
    
    
        print(w2, groupby_columns)
    
    for i in range(len(groupby_cols)):
        if i == 0:
            df[&#39;CMULT&#39;] = df[&#39;GroupA&#39;].map({&#39;A&#39;: 0.5, &#39;B&#39;: 0.3, &#39;C&#39;: 0.2})
            df[&#39;POP&#39;] = pd.to_numeric(df[&#39;POP&#39;]) * df[&#39;CMULT&#39;]
            df[&#39;LF&#39;] = pd.to_numeric(df[&#39;LF&#39;]) * df[&#39;CMULT&#39;]
            df[&#39;WRK&#39;] = pd.to_numeric(df[&#39;WRK&#39;]) * df[&#39;CMULT&#39;]
            df[&#39;no_sam&#39;] = df.shape[0]
    
            agg_dict = {&#39;POP&#39;: &#39;sum&#39;, &#39;LF&#39;: &#39;sum&#39;, &#39;WRK&#39;: &#39;sum&#39;, &#39;no_sam&#39;: &#39;count&#39;}
            # Group the data by the current groupby configuration and calculate the aggregates
            w2 = df.agg(agg_dict).to_frame().T
    
            print(w2, groupby_cols[i])
    
        else:
            test(df, i)
This is the code , in which I am getting calculation errors. The results are -     
    POP   LF    WRK  no_sam
    0  63.0  6.3  630.0     6.0 []
      GroupA  pophat  lfhat  wrkhat  CMULT
    0      A     7.5   0.75    75.0    0.5
    1      B    10.8   1.08   108.0    0.3
    2      C     2.4   0.24    24.0    0.2 [&#39;GroupA&#39;]
      GroupB  pophat  lfhat  wrkhat  CMULT
    0      X     8.5   0.85    85.0    NaN
    1      Y     9.5   0.95    95.0    NaN
    2      Z     2.7   0.27    27.0    NaN [&#39;GroupB&#39;]
      GroupA GroupB  pophat  lfhat  wrkhat  CMULT
    0      A      X     2.5   0.25    25.0    0.5
    1      A      Y     5.0   0.50    50.0    0.5
    2      B      X     3.6   0.36    36.0    0.3
    3      B      Y     4.5   0.45    45.0    0.3
    4      B      Z     2.7   0.27    27.0    0.3
    5      C      X     2.4   0.24    24.0    0.2 [&#39;GroupA&#39;, &#39;GroupB&#39;]
But, outside the loop the results are - (You can verify it by changing the index of groupby_cols[NNNNNNNNN])
    import pandas as pd
    import numpy as np
    
    # Example DataFrame
    df = pd.DataFrame({
        &#39;GroupA&#39;: [&#39;A&#39;, &#39;A&#39;, &#39;B&#39;, &#39;B&#39;, &#39;B&#39;, &#39;C&#39;],
        &#39;GroupB&#39;: [&#39;X&#39;, &#39;Y&#39;, &#39;Z&#39;, &#39;X&#39;, &#39;Y&#39;, &#39;X&#39;],
        &#39;POP&#39;: [10, 20, 30, 40, 50, 60],
        &#39;LF&#39;: [1, 2, 3, 4, 5, 6],
        &#39;WRK&#39;: [100, 200, 300, 400, 500, 600]
    })
    
    groupby_cols = [[], [&#39;GroupA&#39;], [&#39;GroupB&#39;], [&#39;GroupA&#39;, &#39;GroupB&#39;]]
    
    df[&#39;CMULT&#39;] = df.groupby(groupby_cols[i])[&#39;GroupA&#39;].transform(lambda x: x.map({&#39;A&#39;: 0.5, &#39;B&#39;: 0.3, &#39;C&#39;: 0.2}))
    
    # Perform groupby and aggregation based on the current groupby configuration
    w2 = df.groupby(groupby_cols[3]).agg(
        pophat=(&#39;POP&#39;, lambda x: np.sum(x * df[&#39;CMULT&#39;])),
        lfhat=(&#39;LF&#39;, lambda x: np.sum(x * df[&#39;CMULT&#39;])),
        wrkhat=(&#39;WRK&#39;, lambda x: np.sum(x * df[&#39;CMULT&#39;]))
    ).reset_index()
    
    print(w2)
             POP       LF       WRK         no_sam
     0       63.0      6.3      630.0       6.0 
    
      GroupA  pophat  lfhat  wrkhat
    0      A    15.0    1.5   150.0
    1      B    36.0    3.6   360.0
    2      C    12.0    1.2   120.0
    
      GroupB  pophat  lfhat  wrkhat
    0      X    29.0    2.9   290.0
    1      Y    25.0    2.5   250.0
    2      Z     9.0    0.9    90.0
    
      GroupA GroupB  pophat  lfhat  wrkhat
    0      A      X     5.0    0.5    50.0
    1      A      Y    10.0    1.0   100.0
    2      B      X    12.0    1.2   120.0
    3      B      Y    15.0    1.5   150.0
    4      B      Z     9.0    0.9    90.0
    5      C      X    12.0    1.2   120.0
So, am I not understanding the groupby and aggregation properly because It is not working in the loop or The functions work differently in the loop. I have doubt about it, how can It be possible???
</details>
# 答案1
**得分**: 1
Here are the translated parts of the code you provided:
```python
IIUC multiple columns before loop and then aggregate `sum` only:
groupby_cols = [[], ['GroupA'], ['GroupB'], ['GroupA', 'GroupB']]
def test(df, gby):
    groupby_columns = groupby_cols[gby]
    w2 = df.groupby(groupby_columns).agg(
        pophat=('POP', 'sum'),
        lfhat=('LF', 'sum'),
        wrkhat=('WRK', 'sum')
    ).reset_index()
    if len(groupby_columns) == 1:
        w2['CMULT'] = w2[groupby_columns[0]].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
    else:
        w2['CMULT'] = w2['GroupA'].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
    print(w2, groupby_columns)
df['CMULT'] = df['GroupA'].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
df['POP'] = pd.to_numeric(df['POP']) * df['CMULT']
df['LF'] = pd.to_numeric(df['LF']) * df['CMULT']
df['WRK'] = pd.to_numeric(df['WRK']) * df['CMULT']
df['no_sam'] = df.shape[0]
for i in range(len(groupby_cols)):
    if i == 0:
        agg_dict = {'POP': 'sum', 'LF': 'sum', 'WRK': 'sum', 'no_sam': 'count'}
        w2 = df.agg(agg_dict).to_frame().T
        print(w2, groupby_cols[i])
    else:
        test(df, i)

Please note that I've only translated the code, and there are no additional comments or explanations.

英文:

IIUC multiple columns before loop and then aggregate sum only:

groupby_cols = [[], [&#39;GroupA&#39;], [&#39;GroupB&#39;], [&#39;GroupA&#39;, &#39;GroupB&#39;]]
def test(df, gby):
# print (df)
# Perform groupby and aggregation
groupby_columns = groupby_cols[gby]
w2 = df.groupby(groupby_columns).agg(
pophat=(&#39;POP&#39;, &#39;sum&#39;),
lfhat=(&#39;LF&#39;, &#39;sum&#39;),
wrkhat=(&#39;WRK&#39;, &#39;sum&#39;)
).reset_index()
# print (w2)
# Calculate CMULT column based on the current groupby configuration
if len(groupby_columns) == 1:
w2[&#39;CMULT&#39;] = w2[groupby_columns[0]].map({&#39;A&#39;: 0.5, &#39;B&#39;: 0.3, &#39;C&#39;: 0.2})
else:
w2[&#39;CMULT&#39;] = w2[&#39;GroupA&#39;].map({&#39;A&#39;: 0.5, &#39;B&#39;: 0.3, &#39;C&#39;: 0.2})
print(w2, groupby_columns)

df[&#39;CMULT&#39;] = df[&#39;GroupA&#39;].map({&#39;A&#39;: 0.5, &#39;B&#39;: 0.3, &#39;C&#39;: 0.2})
df[&#39;POP&#39;] = pd.to_numeric(df[&#39;POP&#39;]) * df[&#39;CMULT&#39;]
df[&#39;LF&#39;] = pd.to_numeric(df[&#39;LF&#39;]) * df[&#39;CMULT&#39;]
df[&#39;WRK&#39;] = pd.to_numeric(df[&#39;WRK&#39;]) * df[&#39;CMULT&#39;]
df[&#39;no_sam&#39;] = df.shape[0]
for i in range(len(groupby_cols)):
if i == 0:
agg_dict = {&#39;POP&#39;: &#39;sum&#39;, &#39;LF&#39;: &#39;sum&#39;, &#39;WRK&#39;: &#39;sum&#39;, &#39;no_sam&#39;: &#39;count&#39;}
# Group the data by the current groupby configuration and calculate the aggregates
w2 = df.agg(agg_dict).to_frame().T
print(w2, groupby_cols[i])
else:
test(df, i)

通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库，让每个人都能够通过互相帮助和分享经验来进步。

在pandas计算中出现错误。

问题

分割Pandas列表列

JSONDecodeError在尝试读取和格式化Python目录中的多个JSON文件时发生。

build文件夹和spec文件是使用pyinstaller创建可执行文件时的一部分。

尝试根据排名行的值重新排列数据框中的多个列

如何在Playwright视觉比较中屏蔽多个定位器？

在C++中，可以使用可变模板参数来检索类型的内部类型。

selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found

Creating and opening a URL to log in to Website via Basic Auth with Robot Framework/Selenium (Python)

AG Grid 在上下文菜单中以大文本形式打开

What's the correct way to type hint an empty list as a literal in python?

如何在Highcharts Gantt中更改本地化的星期名称

如何在同一个流中使用多个过滤器和映射函数？

如何使用Map/Set来将代码优化到O(n)？

.NET MAUI Android在GitHub Actions上构建失败，错误代码为1。