英文:
Getting error in the calculation in pandas
问题
I am getting calculation errors while writing the groupby function with aggregate function in a loop. But, outside the loop everything is okay. Getting the results correctly...!
import pandas as pd
import numpy as np
# Example DataFrame
df = pd.DataFrame({
'GroupA': ['A', 'A', 'B', 'B', 'B', 'C'],
'GroupB': ['X', 'Y', 'Z', 'X', 'Y', 'X'],
'POP': [10, 20, 30, 40, 50, 60],
'LF': [1, 2, 3, 4, 5, 6],
'WRK': [100, 200, 300, 400, 500, 600]
})
groupby_cols = [[], ['GroupA'], ['GroupB'], ['GroupA', 'GroupB']]
def test(df, gby):
# Perform groupby and aggregation
groupby_columns = groupby_cols[gby]
w2 = df.groupby(groupby_columns).agg(
pophat=('POP', lambda x: np.sum(x * df['CMULT'])),
lfhat=('LF', lambda x: np.sum(x * df['CMULT'])),
wrkhat=('WRK', lambda x: np.sum(x * df['CMULT']))
).reset_index()
# Calculate CMULT column based on the current groupby configuration
if len(groupby_columns) == 1:
w2['CMULT'] = w2[groupby_columns[0]].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
else:
w2['CMULT'] = w2['GroupA'].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
print(w2, groupby_columns)
for i in range(len(groupby_cols)):
if i == 0:
df['CMULT'] = df['GroupA'].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
df['POP'] = pd.to_numeric(df['POP']) * df['CMULT']
df['LF'] = pd.to_numeric(df['LF']) * df['CMULT']
df['WRK'] = pd.to_numeric(df['WRK']) * df['CMULT']
df['no_sam'] = df.shape[0]
agg_dict = {'POP': 'sum', 'LF': 'sum', 'WRK': 'sum', 'no_sam': 'count'}
# Group the data by the current groupby configuration and calculate the aggregates
w2 = df.agg(agg_dict).to_frame().T
print(w2, groupby_cols[i])
else:
test(df, i)
This is the code, in which I am getting calculation errors. The results are -
POP LF WRK no_sam
0 63.0 6.3 630.0 6.0 []
GroupA pophat lfhat wrkhat CMULT
0 A 7.5 0.75 75.0 0.5
1 B 10.8 1.08 108.0 0.3
2 C 2.4 0.24 24.0 0.2 ['GroupA']
GroupB pophat lfhat wrkhat CMULT
0 X 8.5 0.85 85.0 NaN
1 Y 9.5 0.95 95.0 NaN
2 Z 2.7 0.27 27.0 NaN ['GroupB']
GroupA GroupB pophat lfhat wrkhat CMULT
0 A X 2.5 0.25 25.0 0.5
1 A Y 5.0 0.50 50.0 0.5
2 B X 3.6 0.36 36.0 0.3
3 B Y 4.5 0.45 45.0 0.3
4 B Z 2.7 0.27 27.0 0.3
5 C X 2.4 0.24 24.0 0.2 ['GroupA', 'GroupB']
But, outside the loop the results are - (You can verify it by changing the index of groupby_cols[NNNNNNNNN])
import pandas as pd
import numpy as np
# Example DataFrame
df = pd.DataFrame({
'GroupA': ['A', 'A', 'B', 'B', 'B', 'C'],
'GroupB': ['X', 'Y', 'Z', 'X', 'Y', 'X'],
'POP': [10, 20, 30, 40, 50, 60],
'LF': [1, 2, 3, 4, 5, 6],
'WRK': [100, 200, 300, 400, 500, 600]
})
groupby_cols = [[], ['GroupA'], ['GroupB'], ['GroupA', 'GroupB']]
df['CMULT'] = df.groupby(groupby_cols[i])['GroupA'].transform(lambda x: x.map({'A': 0.5, 'B': 0.3, 'C': 0.2}))
# Perform groupby and aggregation based on the current groupby configuration
w2 = df.groupby(groupby_cols[3]).agg(
pophat=('POP', lambda x: np.sum(x * df['CMULT'])),
lfhat=('LF', lambda x: np.sum(x * df['CMULT'])),
wrkhat=('WRK', lambda x: np.sum(x * df['CMULT']))
).reset_index()
print(w2)
The results outside the loop are -
POP LF WRK no_sam
0 63.0 6.3 630.0 6.0
GroupA pophat lfhat wrkhat
0 A 15.0 1.5 150.0
1 B 36.0 3.6 360.0
2 C 12.0 1.2 120.0
GroupB pophat lfhat wrkhat
0 X 29.0 2.9 290.0
1 Y 25.0 2.5 250.0
2 Z 9.0
<details>
<summary>英文:</summary>
I am getting calculation errors while writing the groupby function with aggregate function in a loop.But, outside the loop everything is okay. Getting the results correctly...!
import pandas as pd
import numpy as np
# Example DataFrame
df = pd.DataFrame({
'GroupA': ['A', 'A', 'B', 'B', 'B', 'C'],
'GroupB': ['X', 'Y', 'Z', 'X', 'Y', 'X'],
'POP': [10, 20, 30, 40, 50, 60],
'LF': [1, 2, 3, 4, 5, 6],
'WRK': [100, 200, 300, 400, 500, 600]
})
groupby_cols = [[], ['GroupA'], ['GroupB'], ['GroupA', 'GroupB']]
def test(df, gby):
# Perform groupby and aggregation
groupby_columns = groupby_cols[gby]
w2 = df.groupby(groupby_columns).agg(
pophat=('POP', lambda x: np.sum(x * df['CMULT'])),
lfhat=('LF', lambda x: np.sum(x * df['CMULT'])),
wrkhat=('WRK', lambda x: np.sum(x * df['CMULT']))
).reset_index()
# Calculate CMULT column based on the current groupby configuration
if len(groupby_columns) == 1:
w2['CMULT'] = w2[groupby_columns[0]].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
else:
w2['CMULT'] = w2['GroupA'].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
print(w2, groupby_columns)
for i in range(len(groupby_cols)):
if i == 0:
df['CMULT'] = df['GroupA'].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
df['POP'] = pd.to_numeric(df['POP']) * df['CMULT']
df['LF'] = pd.to_numeric(df['LF']) * df['CMULT']
df['WRK'] = pd.to_numeric(df['WRK']) * df['CMULT']
df['no_sam'] = df.shape[0]
agg_dict = {'POP': 'sum', 'LF': 'sum', 'WRK': 'sum', 'no_sam': 'count'}
# Group the data by the current groupby configuration and calculate the aggregates
w2 = df.agg(agg_dict).to_frame().T
print(w2, groupby_cols[i])
else:
test(df, i)
This is the code , in which I am getting calculation errors. The results are -
POP LF WRK no_sam
0 63.0 6.3 630.0 6.0 []
GroupA pophat lfhat wrkhat CMULT
0 A 7.5 0.75 75.0 0.5
1 B 10.8 1.08 108.0 0.3
2 C 2.4 0.24 24.0 0.2 ['GroupA']
GroupB pophat lfhat wrkhat CMULT
0 X 8.5 0.85 85.0 NaN
1 Y 9.5 0.95 95.0 NaN
2 Z 2.7 0.27 27.0 NaN ['GroupB']
GroupA GroupB pophat lfhat wrkhat CMULT
0 A X 2.5 0.25 25.0 0.5
1 A Y 5.0 0.50 50.0 0.5
2 B X 3.6 0.36 36.0 0.3
3 B Y 4.5 0.45 45.0 0.3
4 B Z 2.7 0.27 27.0 0.3
5 C X 2.4 0.24 24.0 0.2 ['GroupA', 'GroupB']
But, outside the loop the results are - (You can verify it by changing the index of groupby_cols[NNNNNNNNN])
import pandas as pd
import numpy as np
# Example DataFrame
df = pd.DataFrame({
'GroupA': ['A', 'A', 'B', 'B', 'B', 'C'],
'GroupB': ['X', 'Y', 'Z', 'X', 'Y', 'X'],
'POP': [10, 20, 30, 40, 50, 60],
'LF': [1, 2, 3, 4, 5, 6],
'WRK': [100, 200, 300, 400, 500, 600]
})
groupby_cols = [[], ['GroupA'], ['GroupB'], ['GroupA', 'GroupB']]
df['CMULT'] = df.groupby(groupby_cols[i])['GroupA'].transform(lambda x: x.map({'A': 0.5, 'B': 0.3, 'C': 0.2}))
# Perform groupby and aggregation based on the current groupby configuration
w2 = df.groupby(groupby_cols[3]).agg(
pophat=('POP', lambda x: np.sum(x * df['CMULT'])),
lfhat=('LF', lambda x: np.sum(x * df['CMULT'])),
wrkhat=('WRK', lambda x: np.sum(x * df['CMULT']))
).reset_index()
print(w2)
POP LF WRK no_sam
0 63.0 6.3 630.0 6.0
GroupA pophat lfhat wrkhat
0 A 15.0 1.5 150.0
1 B 36.0 3.6 360.0
2 C 12.0 1.2 120.0
GroupB pophat lfhat wrkhat
0 X 29.0 2.9 290.0
1 Y 25.0 2.5 250.0
2 Z 9.0 0.9 90.0
GroupA GroupB pophat lfhat wrkhat
0 A X 5.0 0.5 50.0
1 A Y 10.0 1.0 100.0
2 B X 12.0 1.2 120.0
3 B Y 15.0 1.5 150.0
4 B Z 9.0 0.9 90.0
5 C X 12.0 1.2 120.0
So, am I not understanding the groupby and aggregation properly because It is not working in the loop or The functions work differently in the loop. I have doubt about it, how can It be possible???
</details>
# 答案1
**得分**: 1
Here are the translated parts of the code you provided:
```python
IIUC multiple columns before loop and then aggregate `sum` only:
groupby_cols = [[], ['GroupA'], ['GroupB'], ['GroupA', 'GroupB']]
def test(df, gby):
groupby_columns = groupby_cols[gby]
w2 = df.groupby(groupby_columns).agg(
pophat=('POP', 'sum'),
lfhat=('LF', 'sum'),
wrkhat=('WRK', 'sum')
).reset_index()
if len(groupby_columns) == 1:
w2['CMULT'] = w2[groupby_columns[0]].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
else:
w2['CMULT'] = w2['GroupA'].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
print(w2, groupby_columns)
df['CMULT'] = df['GroupA'].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
df['POP'] = pd.to_numeric(df['POP']) * df['CMULT']
df['LF'] = pd.to_numeric(df['LF']) * df['CMULT']
df['WRK'] = pd.to_numeric(df['WRK']) * df['CMULT']
df['no_sam'] = df.shape[0]
for i in range(len(groupby_cols)):
if i == 0:
agg_dict = {'POP': 'sum', 'LF': 'sum', 'WRK': 'sum', 'no_sam': 'count'}
w2 = df.agg(agg_dict).to_frame().T
print(w2, groupby_cols[i])
else:
test(df, i)
Please note that I've only translated the code, and there are no additional comments or explanations.
英文:
IIUC multiple columns before loop and then aggregate sum
only:
groupby_cols = [[], ['GroupA'], ['GroupB'], ['GroupA', 'GroupB']]
def test(df, gby):
# print (df)
# Perform groupby and aggregation
groupby_columns = groupby_cols[gby]
w2 = df.groupby(groupby_columns).agg(
pophat=('POP', 'sum'),
lfhat=('LF', 'sum'),
wrkhat=('WRK', 'sum')
).reset_index()
# print (w2)
# Calculate CMULT column based on the current groupby configuration
if len(groupby_columns) == 1:
w2['CMULT'] = w2[groupby_columns[0]].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
else:
w2['CMULT'] = w2['GroupA'].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
print(w2, groupby_columns)
df['CMULT'] = df['GroupA'].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
df['POP'] = pd.to_numeric(df['POP']) * df['CMULT']
df['LF'] = pd.to_numeric(df['LF']) * df['CMULT']
df['WRK'] = pd.to_numeric(df['WRK']) * df['CMULT']
df['no_sam'] = df.shape[0]
for i in range(len(groupby_cols)):
if i == 0:
agg_dict = {'POP': 'sum', 'LF': 'sum', 'WRK': 'sum', 'no_sam': 'count'}
# Group the data by the current groupby configuration and calculate the aggregates
w2 = df.agg(agg_dict).to_frame().T
print(w2, groupby_cols[i])
else:
test(df, i)
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论