英文:
Is there a function to return all of rows and exclude the rows that doesn't match the condition in python dataframe?
问题
以下是你要翻译的代码部分:
import pandas as pd
import re
input_csv_file = "./CSV/Officers_and_Shareholders.csv"
df = pd.read_csv(input_csv_file, skiprows=10, on_bad_lines='skip')
df.fillna('', inplace=True)
df.columns = ['Nama', 'Jabatan', 'Alamat', 'Klasifikasi Saham', 'Jumlah Lembar Saham', 'Total']
df['Total'] = df.index if df.index.tolist() else range(len(df))
idx = df.index.get_loc(df['Total'].eq('-').idxmin())
officer = df.loc[:idx]
shareholder = df.loc[idx:]
for i in range(len(df.columns)):
if df["Total"][i] == '-':
shareholders = shareholder
print(i, shareholders)
else:
officers = officer
print(i, officers)
df['Total'] = df.index if df.index.tolist() else range(len(df))
希望这有所帮助。
英文:
I want to exclude the rows whose 'Total' column is '-', and only return every rows that has a value on the 'Total' column. Here is my code:
import pandas as pd
import re
input_csv_file = "./CSV/Officers_and_Shareholders.csv"
df = pd.read_csv(input_csv_file, skiprows=10, on_bad_lines='skip')
df.fillna('', inplace=True)
df.columns = ['Nama', 'Jabatan', 'Alamat', 'Klasifikasi Saham', 'Jumlah Lembar Saham', 'Total']
df['Total'] = df.index if df.index.tolist() else range(len(df))
idx = df.index.get_loc(df['Total'].eq('-').idxmin())
officer = df.loc[:idx]
shareholder = df.loc[idx:]
for i in range(len(df.columns)):
if df["Total"][i] == '-':
shareholders = shareholder
print(i, shareholders)
else:
officers = officer
print(i, officers)
I tried applying the following code:
df['Total'] = df.index if df.index.tolist() else range(len(df))
The code on the above returns the same row, which is row 0 with the following content:
0 Nama Jabatan Alamat Klasifikasi Saham Jumlah Lembar Saham Total
0 ALIF SASETYO, DIREKTUR Jalan - - 0
1 Nama Jabatan Alamat Klasifikasi Saham Jumlah Lembar Saham Total
0 ALIF SASETYO, DIREKTUR Jalan - - 0
2 Nama Jabatan Alamat Klasifikasi Saham Jumlah Lembar Saham Total
0 ALIF SASETYO, DIREKTUR Jalan - - 0
3 Nama Jabatan Alamat Klasifikasi Saham Jumlah Lembar Saham Total
0 ALIF SASETYO, DIREKTUR Jalan - - 0
4 Nama Jabatan Alamat Klasifikasi Saham Jumlah Lembar Saham Total
0 ALIF SASETYO, DIREKTUR Jalan - - 0
Is there anyway to return all of the rows for those 'Total' column is either 0 or '-'? If the code is able to return every rows, it should return the following:
Nama ... Total
0 ALIF SASETYO, ... -
1 NIK: 3171060201830005 ...
2 NPWP: 246383541071000 ...
3 TTL: Jakarta, 02 Januari 1983 ...
4 ARIEF HERMAWAN, ... -
5 NIK: 1271121011700003 ...
6 NPWP: 070970173112000 ...
7 TTL: Bogor, 10 November 1970 ...
8 ARLAN SEPTIA ANANDA ...
9 RASAM, ...
10 NIK: 3174051209620003 ...
11 NPWP: 080878200013000 ...
12 TTL: Jakarta, 12 September ...
13 1962 ...
14 CHAIRAL TANJUNG, ... -
15 NIK: 3171011605660004 ...
16 NPWP: 070141650093000 ...
17 TTL: Jakarta, 16 Mei 1966 ...
18 FUAD RIZAL, ...
19 NIK: 3174010201780008 NPWP: 488337379015000 TT... ... -
20 Ir. R AGUS HARYOTO ... -
21 PURNOMO, UTAMA RASRINIK: 3578032408610001 ...
22 NPWP: 097468813615000 ...
23 TTL: SLEMAN, 24 Agustus 1961 ...
答案1
得分: 1
你可以使用以下代码来排除列"Total"中值为"-"的行:
df[df['Total'] != "-"]
英文:
You can exclude rows in column Total which value "-" using just.
df[df['Total'] != "-"]
答案2
得分: 0
我发现了以下问题的解决方案:
def extract_names_and_shareholding_percentage_of_shareholders(self, df):
shareholders_names = df.loc[(df['Jabatan'] == '-') & (df['Jumlah Lembar Saham'] != '-')]['Nama']
pattern_shareholding_numbers = re.compile(r'[\d.]*\d+')
shareholding_percentage_thousand_separator_removed = df[self.NUMBER_OF_SHARES].str.replace('.', '')
shareholding_percentage_string = ' '.join(shareholding_percentage_thousand_separator_removed)
matches = pattern_shareholding_numbers.findall(shareholding_percentage_string)
float_matches = [float(shareholding) for shareholding in matches]
shareholding_percentage_total = sum(float_matches)
matches_percentages = [round(shareholding / shareholding_percentage_total * 100) for shareholding in float_matches]
assert sum(matches_percentages) <= 100
return(dict(zip(shareholders_names, matches_percentages)))
def extract_names_and_shareholding_percentage_of_officers(self, df):
officers_names = df.loc[(df['Jabatan'] != '-') & (df['Jumlah Lembar Saham'] == '-')]['Nama']
pattern_shareholding_numbers = re.compile(r'[\d.]*\d+')
shareholding_percentage_thousand_separator_removed = df[self.NUMBER_OF_SHARES].str.replace('.', '')
shareholding_percentage_string = ' '.join(shareholding_percentage_thousand_separator_removed)
matches = pattern_shareholding_numbers.findall(shareholding_percentage_string)
float_matches = [float(shareholding) for shareholding in matches]
shareholding_percentage_total = sum(float_matches)
matches_percentages = [round(shareholding / shareholding_percentage_total * 100) for shareholding in float_matches]
assert sum(matches_percentages) <= 100
return(dict(zip(officers_names, matches_percentages)))
英文:
I found out the following solution for the question:
def extract_names_and_shareholding_percentage_of_shareholders(self, df):
shareholders_names = df.loc[(df['Jabatan'] == '-') & (df['Jumlah Lembar Saham'] != '-')]['Nama']
pattern_shareholding_numbers = re.compile(r'[\d.]*\d+')
shareholding_percentage_thousand_separator_removed = df[self.NUMBER_OF_SHARES].str.replace('.', '')
shareholding_percentage_string = ' '.join(shareholding_percentage_thousand_separator_removed)
matches = pattern_shareholding_numbers.findall(shareholding_percentage_string)
float_matches = [float(shareholding) for shareholding in matches]
shareholding_percentage_total = sum(float_matches)
matches_percentages = [round(shareholding / shareholding_percentage_total * 100) for shareholding in float_matches]
assert sum(matches_percentages) <= 100
return(dict(zip(shareholders_names, matches_percentages)))
def extract_names_and_shareholding_percentage_of_officers(self, df):
officers_names = df.loc[(df['Jabatan'] != '-') & (df['Jumlah Lembar Saham'] == '-')]['Nama']
pattern_shareholding_numbers = re.compile(r'[\d.]*\d+')
shareholding_percentage_thousand_separator_removed = df[self.NUMBER_OF_SHARES].str.replace('.', '')
shareholding_percentage_string = ' '.join(shareholding_percentage_thousand_separator_removed)
matches = pattern_shareholding_numbers.findall(shareholding_percentage_string)
float_matches = [float(shareholding) for shareholding in matches]
shareholding_percentage_total = sum(float_matches)
matches_percentages = [round(shareholding / shareholding_percentage_total * 100) for shareholding in float_matches]
assert sum(matches_percentages) <= 100
return(dict(zip(officers_names, matches_percentages)))
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论