英文:
Would re-writing this code in PL/SQL be faster than preforming 3 for-loops in Python & how would I transform this?
问题
我有一个查询结果如下:
- 位置编号:lct_nbr
- 财务周结束日期:fsc_wk_end_dt
- 商品编号:itm_nbr
- 产品编号:pg_nbr
- SUM(quantity):qty
- 本地时间戳:refresh_date
我正在尝试确定商品渗透率或权重。这些循环可以使用PL/SQL完成吗?我对这个概念不太熟悉。这样做比在Python中执行要快吗?它需要很多小时...
item_penetration_by_units = pd.read_sql_query(query, trino_conn)
item_penetration_by_units['total'] = math.nan
item_penetration_by_units['weight'] = math.nan
LCT_NBRs = item_penetration_by_units['lct_nbr']
LCT_NBRs = LCT_NBRs.drop_duplicates()
LCT_NBRs = LCT_NBRs.reset_index(drop=True)
LCT_NBRs = np.array(LCT_NBRs)
i = 0
for i in range(len(LCT_NBRs)):
x = item_penetration_by_units.loc[item_penetration_by_units['lct_nbr'] == LCT_NBRs[i]]
print("i: " + str(i) + " and LCT_NBR: " + str(LCT_NBRs[i]))
PG_NBRs = x['pg_nbr']
PG_NBRs = PG_NBRs.drop_duplicates()
PG_NBRs = PG_NBRs.reset_index(drop=True)
PG_NBRs = np.array(PG_NBRs)
n = len(PG_NBRs)
j = 0
for j in range(n):
y = x.loc[x['pg_nbr'] == PG_NBRs[j]]
print("j: " + str(j) + " and PG_NBR: " + str(PG_NBRs[j]))
WK_End_DTs = y['fsc_wk_end_dt']
WK_End_DTs = WK_End_DTs.drop_duplicates()
WK_End_DTs = WK_End_DTs.reset_index(drop=True)
WK_End_DTs = np.array(WK_End_DTs)
m = len(WK_End_DTs)
k = 0
for k in range(m):
z = y.loc[y['fsc_wk_end_dt'] == WK_End_DTs[k]]
index = z.index
indx = index.values
z = z.reset_index(drop=True)
total_k = float(z.qty.sum())
l = 0
for l in range(len(z)):
item_penetration_by_units.loc[indx[l], 'total'] = total_k
if (float(z.loc[l, 'qty']) == 0 and total_k == 0):
item_penetration_by_units.loc[indx[l], 'weight'] = 0.0
else:
item_penetration_by_units.loc[indx[l], 'weight'] = float(z.loc[l, 'qty']) / total_k
每个 itm_nbr 都有唯一的 qty、lct_nbr、pg_nbr 和 fsc_wk_end_dt。权重是通过将特定商品编号的 qty 除以 总 qty(即特定位置、特定周的特定产品组中所有商品编号的 qty 总和)来计算的。
英文:
I have a query that outputs:
- location number: lct_nbr
- fiscal week end date: fsc_wk_end_dt
- item number: itm_nbr
- product number: pg_nbr
- SUM(quantity): qty
- localtimestamp: refresh_date
and I am trying to determine item penetration, or weight. Can these for loops be done with PL/SQL? I am not very familiar with the concept. Would it be faster than doing this in Python? It takes many many hours...
item_penetration_by_units = pd.read_sql_query(query, trino_conn)
item_penetration_by_units['total'] = math.nan
item_penetration_by_units['weight'] = math.nan
LCT_NBRs = item_penetration_by_units['lct_nbr']
LCT_NBRs = LCT_NBRs.drop_duplicates()
LCT_NBRs = LCT_NBRs.reset_index(drop=True)
LCT_NBRs = np.array(LCT_NBRs)
i = 0
for i in range(len(LCT_NBRs)):
x = item_penetration_by_units.loc[item_penetration_by_units['lct_nbr'] == LCT_NBRs[i]]
print("i: " + str(i) + " and LCT_NBR: " + str(LCT_NBRs[i]))
PG_NBRs = x['pg_nbr']
PG_NBRs = PG_NBRs.drop_duplicates()
PG_NBRs = PG_NBRs.reset_index(drop=True)
PG_NBRs = np.array(PG_NBRs)
n = len(PG_NBRs)
j = 0
for j in range(n):
y = x.loc[x['pg_nbr'] == PG_NBRs[j]]
print("j: " + str(j) + " and PG_NBR: " + str(PG_NBRs[j]))
WK_End_DTs = y['fsc_wk_end_dt']
WK_End_DTs = WK_End_DTs.drop_duplicates()
WK_End_DTs = WK_End_DTs.reset_index(drop=True)
WK_End_DTs = np.array(WK_End_DTs)
m = len(WK_End_DTs)
k = 0
for k in range(m):
z = y.loc[y['fsc_wk_end_dt'] == WK_End_DTs[k]]
index = z.index
indx = index.values
z = z.reset_index(drop=True)
total_k = float(z.qty.sum())
l = 0
for l in range(len(z)):
item_penetration_by_units.loc[indx[l], 'total'] = total_k
if (float(z.loc[l, 'qty']) == 0 and total_k == 0):
item_penetration_by_units.loc[indx[l], 'weight'] = 0.0
else:
item_penetration_by_units.loc[indx[l], 'weight'] = float(z.loc[l, 'qty']) / total_k
Each itm_nbr has a unique qty, lct_nbr, pg_nbr and fsc_wk_end_dt. Weight is calculated by dividing the qty a particular item number has by the total qty (which is the qty sum for all the item numbers in that particular product group at that particular location for that particular week).
答案1
得分: 1
好的,以下是代码部分的中文翻译:
import pandas as pd
import math
# lct fsc_wk_end_dt pg_nbr itm_nbr qty
data = [
[ 1, '2023-08-01', 1, 1, 5 ],
[ 1, '2023-08-01', 1, 2, 6 ],
[ 1, '2023-08-01', 1, 3, 7 ],
[ 1, '2023-08-01', 2, 1, 2 ],
[ 1, '2023-08-01', 2, 2, 3 ],
[ 1, '2023-08-01', 2, 3, 4 ],
[ 1, '2023-09-01', 1, 1, 1 ],
[ 1, '2023-09-01', 1, 2, 2 ],
[ 1, '2023-09-01', 1, 3, 3 ],
[ 1, '2023-09-01', 2, 1, 9 ],
[ 1, '2023-09-01', 2, 2, 8 ],
[ 1, '2023-09-01', 2, 3, 7 ],
[ 2, '2023-08-01', 1, 1, 3 ],
[ 2, '2023-08-01', 1, 2, 6 ],
[ 2, '2023-08-01', 1, 3, 9 ],
[ 2, '2023-08-01', 2, 1, 7 ],
[ 2, '2023-08-01', 2, 2, 4 ],
[ 2, '2023-08-01', 2, 3, 1 ],
[ 2, '2023-09-01', 1, 1, 2 ],
[ 2, '2023-09-01', 1, 2, 4 ],
[ 2, '2023-09-01', 1, 3, 2 ],
[ 2, '2023-09-01', 2, 1, 9 ],
[ 2, '2023-09-01', 2, 2, 8 ],
[ 2, '2023-09-01', 2, 3, 5 ]
]
item_penetration_by_units = pd.DataFrame( data, columns = ['lct','fsc_wk_end_dt','pg_nbr','item_nbr','qty'] )
print(item_penetration_by_units)
item_penetration_by_units.set_index(['lct','fsc_wk_end_dt','pg_nbr'],inplace=True)
x1 = item_penetration_by_units.groupby(['lct','fsc_wk_end_dt','pg_nbr']).sum()
item_penetration_by_units['total'] = item_penetration_by_units.join( x1, how='left', rsuffix='x')['qtyx']
item_penetration_by_units['weight'] = item_penetration_by_units['qty']/item_penetration_by_units['total']
item_penetration_by_units.reset_index(inplace=True)
print(item_penetration_by_units)
希望这有所帮助!
英文:
OK, I think you can do all of this with a .groupby
, .sum
, and a division. The key to that is making a multi-field index using location, date, and product group. And given this, yes, I believe you could do the same thing in SQL, although this seems pretty slick.
import pandas as pd
import math
# lct fsc_wk_end_dt pg_nbr itm_nbr qty
data = [
[ 1, '2023-08-01', 1, 1, 5 ],
[ 1, '2023-08-01', 1, 2, 6 ],
[ 1, '2023-08-01', 1, 3, 7 ],
[ 1, '2023-08-01', 2, 1, 2 ],
[ 1, '2023-08-01', 2, 2, 3 ],
[ 1, '2023-08-01', 2, 3, 4 ],
[ 1, '2023-09-01', 1, 1, 1 ],
[ 1, '2023-09-01', 1, 2, 2 ],
[ 1, '2023-09-01', 1, 3, 3 ],
[ 1, '2023-09-01', 2, 1, 9 ],
[ 1, '2023-09-01', 2, 2, 8 ],
[ 1, '2023-09-01', 2, 3, 7 ],
[ 2, '2023-08-01', 1, 1, 3 ],
[ 2, '2023-08-01', 1, 2, 6 ],
[ 2, '2023-08-01', 1, 3, 9 ],
[ 2, '2023-08-01', 2, 1, 7 ],
[ 2, '2023-08-01', 2, 2, 4 ],
[ 2, '2023-08-01', 2, 3, 1 ],
[ 2, '2023-09-01', 1, 1, 2 ],
[ 2, '2023-09-01', 1, 2, 4 ],
[ 2, '2023-09-01', 1, 3, 2 ],
[ 2, '2023-09-01', 2, 1, 9 ],
[ 2, '2023-09-01', 2, 2, 8 ],
[ 2, '2023-09-01', 2, 3, 5 ]
]
item_penetration_by_units = pd.DataFrame( data, columns = ['lct','fsc_wk_end_dt','pg_nbr','item_nbr','qty'] )
print(item_penetration_by_units)
item_penetration_by_units.set_index(['lct','fsc_wk_end_dt','pg_nbr'],inplace=True)
x1 = item_penetration_by_units.groupby(['lct','fsc_wk_end_dt','pg_nbr']).sum()
item_penetration_by_units['total'] = item_penetration_by_units.join( x1, how='left', rsuffix='x')['qtyx']
item_penetration_by_units['weight'] = item_penetration_by_units['qty']/item_penetration_by_units['total']
item_penetration_by_units.reset_index(inplace=True)
print(item_penetration_by_units)
Output:
lct fsc_wk_end_dt pg_nbr item_nbr qty
0 1 2023-08-01 1 1 5
1 1 2023-08-01 1 2 6
2 1 2023-08-01 1 3 7
3 1 2023-08-01 2 1 2
4 1 2023-08-01 2 2 3
5 1 2023-08-01 2 3 4
6 1 2023-09-01 1 1 1
7 1 2023-09-01 1 2 2
8 1 2023-09-01 1 3 3
9 1 2023-09-01 2 1 9
10 1 2023-09-01 2 2 8
11 1 2023-09-01 2 3 7
12 2 2023-08-01 1 1 3
13 2 2023-08-01 1 2 6
14 2 2023-08-01 1 3 9
15 2 2023-08-01 2 1 7
16 2 2023-08-01 2 2 4
17 2 2023-08-01 2 3 1
18 2 2023-09-01 1 1 2
19 2 2023-09-01 1 2 4
20 2 2023-09-01 1 3 2
21 2 2023-09-01 2 1 9
22 2 2023-09-01 2 2 8
23 2 2023-09-01 2 3 5
lct fsc_wk_end_dt pg_nbr item_nbr qty total weight
0 1 2023-08-01 1 1 5 18 0.277778
1 1 2023-08-01 1 2 6 18 0.333333
2 1 2023-08-01 1 3 7 18 0.388889
3 1 2023-08-01 2 1 2 9 0.222222
4 1 2023-08-01 2 2 3 9 0.333333
5 1 2023-08-01 2 3 4 9 0.444444
6 1 2023-09-01 1 1 1 6 0.166667
7 1 2023-09-01 1 2 2 6 0.333333
8 1 2023-09-01 1 3 3 6 0.500000
9 1 2023-09-01 2 1 9 24 0.375000
10 1 2023-09-01 2 2 8 24 0.333333
11 1 2023-09-01 2 3 7 24 0.291667
12 2 2023-08-01 1 1 3 18 0.166667
13 2 2023-08-01 1 2 6 18 0.333333
14 2 2023-08-01 1 3 9 18 0.500000
15 2 2023-08-01 2 1 7 12 0.583333
16 2 2023-08-01 2 2 4 12 0.333333
17 2 2023-08-01 2 3 1 12 0.083333
18 2 2023-09-01 1 1 2 8 0.250000
19 2 2023-09-01 1 2 4 8 0.500000
20 2 2023-09-01 1 3 2 8 0.250000
21 2 2023-09-01 2 1 9 22 0.409091
22 2 2023-09-01 2 2 8 22 0.363636
23 2 2023-09-01 2 3 5 22 0.227273
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论