英文:
how to merge tuple and dataframe data
问题
以下是代码部分的翻译:
import pandas as pd
import numpy as np
data = [
[1, 'Main', 'V15', 40, 'GROUP 1', 'dNumber', 'U220059090(C)'],
[2, 'Main', 'V15', 40, 'GROUP 1', 'tDate', '44901'],
[3, 'Main', 'V15', 40, 'GROUP 2', 'dNumber', 'U220059090(C)'],
[4, 'Main', 'V15', 40, 'GROUP 2', 'tDate', '44901'],
[5, 'Main', 'V15', 40, None, 'sCompany', 'bp'],
[6, 'Main', 'V15', 42, 'GROUP 1', 'dNumber', 'U220059090(C)'],
[7, 'Main', 'V15', 42, 'GROUP 1', 'tDate', '44901'],
[8, 'Main', 'V15', 42, 'GROUP 2', 'dNumber', 'U220059090(C)'],
[9, 'Main', 'V15', 42, 'GROUP 2', 'tDate', '44901'],
[10, 'Main', 'V15', 42, None, 'sCompany', 'bp'],
[11, 'Main', 'V15', 44, None, 'Sender', 'sDummy'],
[12, 'Main', 'V15', 44, None, 'TradeDate', 'Tdummy'],
[13, 'Main', 'V15', 44, None, 'Product', 'Pdummy'],
[14, 'Main', 'V15', 44, None, 'seller', 'seDummy'],
[15, 'Delivery', 'V15', 40, None, 'delIncoTerm', 'FIP'],
[16, 'Delivery', 'V15', 40, None, 'delWindow', '44562'],
]
df = pd.DataFrame(data, columns=['ID', 'Model', 'MVersion', 'dId', 'sGroup', 'eName', 'eValue'])
print(df)
print('\n')
nullSectionGroup = df[df['sGroup'].isnull()]
print('null sGroup')
print('----------------')
print(nullSectionGroup)
print('\n')
grpModel = df.groupby('Model') # 1) 按 Model 进行分组
for model in grpModel:
grpModelVersion = model[1].groupby('MVersion') # 2) 按 MVersion 进行分组
for modelVersion in grpModelVersion:
grpDocId = modelVersion[1].groupby('dId') # 3) 按 dId 进行分组
for docId in grpDocId:
#print('docId', docId)
grpSG = docId[1].groupby('sGroup') # 4) 按 sGroup 进行分组
for x in grpSG:
# 变量声明
model = x[1].Model.iloc[0]
modelVersion = x[1].MVersion.iloc[0]
docId = x[1].dId.iloc[0]
sectionGroup = x[1].sGroup.iloc[0]
# 根据 x[1] 的值筛选空的部分组数据框
#print('****model :', model, '**mVersion :', mVersion, '**Doc_Id :', dId, '**sGroup :', sGroup)
filtered_value = nullSectionGroup.loc[(nullSectionGroup['Model']==model)&(nullSectionGroup['MVersion']==modelVersion)&(nullSectionGroup['dId']==docId)]
print('filtered_value => pandas.core.frame.DataFrame')
print(filtered_value)
print('grouped values => tuple')
print(x)
print('\n')
如果您需要更多的帮助或有其他问题,请随时提出。
英文:
may be my questions is too basic but I am learning python. Let me know if you need more information.
I have dataframe as below.
ID Model MVersion dId sGroup eName eValue
0 1 Main V15 40 GROUP 1 dNumber U220059090(C)
1 2 Main V15 40 GROUP 1 tDate 44901
2 3 Main V15 40 GROUP 2 dNumber U220059090(C)
3 4 Main V15 40 GROUP 2 tDate 44901
4 5 Main V15 40 None sCompany bp
5 6 Main V15 42 GROUP 1 dNumber U220059090(C)
6 7 Main V15 42 GROUP 1 tDate 44901
7 8 Main V15 42 GROUP 2 dNumber U220059090(C)
8 9 Main V15 42 GROUP 2 tDate 44901
9 10 Main V15 42 None sCompany bp
10 11 Main V15 44 None Sender sDummy
11 12 Main V15 44 None TradeDate Tdummy
12 13 Main V15 44 None Product Pdummy
13 14 Main V15 44 None seller seDummy
I needed to apply grouping on Model, MVersion, dId & sGroup columns which I have done below.
I am trying to get result as below into separate group the None sGroup should be part of Group1 and Group2 for each dId. some dId might have all sGroup as None. Also is that possible to add new column as Group_Id with Incremental values.
ID Model MVersion dId sGroup eName eValue Group_Id
0 1 Main V15 40 GROUP 1 dNumber U220059090(C) 1
1 2 Main V15 40 GROUP 1 tDate 44901 1
4 5 Main V15 40 None sCompany bp 1
ID Model MVersion dId sGroup eName eValue Group_Id
2 3 Main V15 40 GROUP 2 dNumber U220059090(C) 2
3 4 Main V15 40 GROUP 2 tDate 44901 2
4 5 Main V15 40 None sCompany bp 2
ID Model MVersion dId sGroup eName eValue Group_Id
5 6 Main V15 42 GROUP 1 dNumber U220059090(C) 3
6 7 Main V15 42 GROUP 1 tDate 44901 3
9 10 Main V15 42 None sCompany bp 3
ID Model MVersion dId sGroup eName eValue Group_Id
7 8 Main V15 42 GROUP 2 dNumber U220059090(C) 4
8 9 Main V15 42 GROUP 2 tDate 44901 4
9 10 Main V15 42 None sCompany bp 4
ID Model MVersion dId sGroup eName eValue Group_Id
10 11 Main V15 44 None Sender sDummy 5
11 12 Main V15 44 None TradeDate Tdummy 5
12 13 Main V15 44 None Product Pdummy 5
13 14 Main V15 44 None seller seDummy 5
what I have tried is to filtered out all None to one dataframe and applied grouping on Model, MVersion, dId and SGroup. I am not sure how can I combined these two result into one. I don't know what is correct and efficient way to do this. any help is really appreciated.
import pandas as pd
import numpy as np
data = [
[1,'Main','V15', 40,'GROUP 1','dNumber','U220059090(C)'],
[2,'Main','V15', 40,'GROUP 1','tDate','44901'],
[3,'Main','V15', 40,'GROUP 2','dNumber','U220059090(C)'],
[4,'Main','V15', 40,'GROUP 2','tDate','44901'],
[5,'Main','V15', 40,None, 'sCompany','bp'],
[6,'Main','V15', 42,'GROUP 1','dNumber','U220059090(C)'],
[7,'Main','V15', 42,'GROUP 1','tDate','44901'],
[8,'Main','V15', 42,'GROUP 2','dNumber','U220059090(C)'],
[9,'Main','V15', 42,'GROUP 2','tDate','44901'],
[10,'Main','V15', 42,None,'sCompany','bp'],
[11,'Main','V15', 44,None,'Sender','sDummy'],
[12,'Main','V15', 44,None,'TradeDate','Tdummy'],
[13,'Main','V15', 44,None,'Product','Pdummy'],
[14,'Main','V15', 44,None,'seller','seDummy'],
[15,'Delivery','V15', 40,None,'delIncoTerm','FIP'],
[16,'Delivery','V15', 40,None,'delWindow','44562'],
]
df = pd.DataFrame(data, columns=['ID','Model','MVersion','dId','sGroup','eName','eValue'])
print(df)
print('\n')
nullSectionGroup = df[df['sGroup'].isnull()]
print('null sGroup')
print('----------------')
print(nullSectionGroup)
print('\n')
grpModel = df.groupby('Model') # 1) group by Model
for model in grpModel:
grpModelVersion = model[1].groupby('MVersion') # 2) group by MVersion
for modelVersion in grpModelVersion:
grpDocId = modelVersion[1].groupby('dId') # 3) group by dId
for docId in grpDocId:
#print('docId', docId)
grpSG = docId[1].groupby('sGroup') # 4) group by sGroup
for x in grpSG:
#variable declarition
model = x[1].Model.iloc[0]
modelVersion = x[1].MVersion.iloc[0]
docId = x[1].dId.iloc[0]
sectionGroup = x[1].sGroup.iloc[0]
#filtering dataframe of null section group based on x[1] values
#print('****model :', model, '**mVersion :', mVersion, '**Doc_Id :', dId, '**sGroup :', sGroup)
filtered_value = nullSectionGroup.loc[(nullSectionGroup['Model']==model)&(nullSectionGroup['MVersion']==modelVersion)&(nullSectionGroup['dId']==docId)]
print('filtered_value => pandas.core.frame.DataFrame')
print(filtered_value)
print('grouped values => tuple')
print(x)
print('\n')
答案1
得分: 1
尝试:
# 将 'None' 字符串转换为实际的 None(如果需要)
df.loc[df['sGroup'].eq('None'), 'sGroup'] = None
grp_num = 1
for _, g1 in df.groupby(['Model', 'MVersion', 'dId']):
mask = g1['sGroup'].isna()
if mask.all():
g1['Group_Id'] = grp_num
grp_num += 1
print(g1)
print()
else:
for _, g2 in g1[~mask].groupby('sGroup'):
g2 = pd.concat([g2, g1[mask]])
g2['Group_Id'] = grp_num
grp_num += 1
print(g2)
print()
打印结果:
ID Model MVersion dId sGroup eName eValue Group_Id
0 1 Main V15 40 GROUP 1 dNumber U220059090(C) 1
1 2 Main V15 40 GROUP 1 tDate 44901 1
4 5 Main V15 40 None sCompany bp 1
ID Model MVersion dId sGroup eName eValue Group_Id
2 3 Main V15 40 GROUP 2 dNumber U220059090(C) 2
3 4 Main V15 40 GROUP 2 tDate 44901 2
4 5 Main V15 40 None sCompany bp 2
ID Model MVersion dId sGroup eName eValue Group_Id
5 6 Main V15 42 GROUP 1 dNumber U220059090(C) 3
6 7 Main V15 42 GROUP 1 tDate 44901 3
9 10 Main V15 42 None sCompany bp 3
ID Model MVersion dId sGroup eName eValue Group_Id
7 8 Main V15 42 GROUP 2 dNumber U220059090(C) 4
8 9 Main V15 42 GROUP 2 tDate 44901 4
9 10 Main V15 42 None sCompany bp 4
ID Model MVersion dId sGroup eName eValue Group_Id
10 11 Main V15 44 None Sender sDummy 5
11 12 Main V15 44 None TradeDate Tdummy 5
12 13 Main V15 44 None Product Pdummy 5
13 14 Main V15 44 None seller seDummy 5
英文:
Try:
# conver the `None` string to actual None (if necessary)
df.loc[df['sGroup'].eq('None'), 'sGroup'] = None
grp_num = 1
for _, g1 in df.groupby(['Model', 'MVersion', 'dId']):
mask = g1['sGroup'].isna()
if mask.all():
g1['Group_Id'] = grp_num
grp_num += 1
print(g1)
print()
else:
for _, g2 in g1[~mask].groupby('sGroup'):
g2 = pd.concat([g2, g1[mask]])
g2['Group_Id'] = grp_num
grp_num += 1
print(g2)
print()
Prints:
ID Model MVersion dId sGroup eName eValue Group_Id
0 1 Main V15 40 GROUP 1 dNumber U220059090(C) 1
1 2 Main V15 40 GROUP 1 tDate 44901 1
4 5 Main V15 40 None sCompany bp 1
ID Model MVersion dId sGroup eName eValue Group_Id
2 3 Main V15 40 GROUP 2 dNumber U220059090(C) 2
3 4 Main V15 40 GROUP 2 tDate 44901 2
4 5 Main V15 40 None sCompany bp 2
ID Model MVersion dId sGroup eName eValue Group_Id
5 6 Main V15 42 GROUP 1 dNumber U220059090(C) 3
6 7 Main V15 42 GROUP 1 tDate 44901 3
9 10 Main V15 42 None sCompany bp 3
ID Model MVersion dId sGroup eName eValue Group_Id
7 8 Main V15 42 GROUP 2 dNumber U220059090(C) 4
8 9 Main V15 42 GROUP 2 tDate 44901 4
9 10 Main V15 42 None sCompany bp 4
ID Model MVersion dId sGroup eName eValue Group_Id
10 11 Main V15 44 None Sender sDummy 5
11 12 Main V15 44 None TradeDate Tdummy 5
12 13 Main V15 44 None Product Pdummy 5
13 14 Main V15 44 None seller seDummy 5
答案2
得分: 1
以下是代码的翻译部分:
cols = ['Model', 'MVersion', 'dId']
m = df['sGroup'].isnull()
# 为所有空值设置多个分组
xgrp = (df['sGroup'].fillna(df.groupby(cols)['sGroup']
.transform(lambda x: ','.join(set(x.dropna())))
.str.split(',')))
# 按照常规列 + xgrp 进行分组
dfs = [subdf.drop(columns='xgrp').assign(Group_Id=gid)
for gid, (_, subdf) in enumerate(df.assign(xgrp=xgrp).explode('xgrp')
.groupby(cols + ['xgrp'], sort=False))]
输出:
>>> dfs[0]
ID Model MVersion dId sGroup eName eValue Group_Id
0 1 Main V15 40 GROUP 1 dNumber U220059090(C) 0
1 2 Main V15 40 GROUP 1 tDate 44901 0
4 5 Main V15 40 None sCompany bp 0
>>> dfs[1]
ID Model MVersion dId sGroup eName eValue Group_Id
2 3 Main V15 40 GROUP 2 dNumber U220059090(C) 1
3 4 Main V15 40 GROUP 2 tDate 44901 1
4 5 Main V15 40 None sCompany bp 1
>>> dfs[2]
ID Model MVersion dId sGroup eName eValue Group_Id
5 6 Main V15 42 GROUP 1 dNumber U220059090(C) 2
6 7 Main V15 42 GROUP 1 tDate 44901 2
9 10 Main V15 42 None sCompany bp 2
>>> dfs[3]
ID Model MVersion dId sGroup eName eValue Group_Id
7 8 Main V15 42 GROUP 2 dNumber U220059090(C) 3
8 9 Main V15 42 GROUP 2 tDate 44901 3
9 10 Main V15 42 None sCompany bp 3
>>> dfs[4]
ID Model MVersion dId sGroup eName eValue Group_Id
10 11 Main V15 44 None Sender sDummy 4
11 12 Main V15 44 None TradeDate Tdummy 4
12 13 Main V15 44 None Product Pdummy 4
13 14 Main V15 44 None seller seDummy 4
>>> dfs[5]
ID Model MVersion dId sGroup eName eValue Group_Id
14 15 Delivery V15 40 None delIncoTerm FIP 5
15 16 Delivery V15 40 None delWindow 44562 5
希望这有所帮助!
英文:
You can use:
cols = ['Model', 'MVersion', 'dId']
m = df['sGroup'].isnull()
# Set multiple groups for all None values
xgrp = (df['sGroup'].fillna(df.groupby(cols)['sGroup']
.transform(lambda x: ','.join(set(x.dropna())))
.str.split(',')))
# Group by usual columns + xgrp
dfs = [subdf.drop(columns='xgrp').assign(Group_Id=gid)
for gid, (_, subdf) in enumerate(df.assign(xgrp=xgrp).explode('xgrp')
.groupby(cols + ['xgrp'], sort=False))]
Output:
>>> dfs[0]
ID Model MVersion dId sGroup eName eValue Group_Id
0 1 Main V15 40 GROUP 1 dNumber U220059090(C) 0
1 2 Main V15 40 GROUP 1 tDate 44901 0
4 5 Main V15 40 None sCompany bp 0
>>> dfs[1]
ID Model MVersion dId sGroup eName eValue Group_Id
2 3 Main V15 40 GROUP 2 dNumber U220059090(C) 1
3 4 Main V15 40 GROUP 2 tDate 44901 1
4 5 Main V15 40 None sCompany bp 1
>>> dfs[2]
ID Model MVersion dId sGroup eName eValue Group_Id
5 6 Main V15 42 GROUP 1 dNumber U220059090(C) 2
6 7 Main V15 42 GROUP 1 tDate 44901 2
9 10 Main V15 42 None sCompany bp 2
>>> dfs[3]
ID Model MVersion dId sGroup eName eValue Group_Id
7 8 Main V15 42 GROUP 2 dNumber U220059090(C) 3
8 9 Main V15 42 GROUP 2 tDate 44901 3
9 10 Main V15 42 None sCompany bp 3
>>> dfs[4]
ID Model MVersion dId sGroup eName eValue Group_Id
10 11 Main V15 44 None Sender sDummy 4
11 12 Main V15 44 None TradeDate Tdummy 4
12 13 Main V15 44 None Product Pdummy 4
13 14 Main V15 44 None seller seDummy 4
>>> dfs[5]
ID Model MVersion dId sGroup eName eValue Group_Id
14 15 Delivery V15 40 None delIncoTerm FIP 5
15 16 Delivery V15 40 None delWindow 44562 5
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论