英文:
JSON to Python dataframe: mapping values from another API
问题
以下是您要翻译的部分:
import requests
import json
import pandas as pd
def getdata(link):
try:
response = s.get(link)
except requests.exceptions.RequestException as e:
print('Request Exception Found!')
json_data = response.json()
return json_data
s = requests.session()
json_data = getdata('https://api.school.com/2020/students.json')
print(json_data)
def normalize(json_data, record_path):
temp = json.dumps(json_data)
ar = json.loads(temp)
df = pd.json_normalize(ar[record_path])
return df
Student_df = normalize(json_data, 'Students')
Student_ids = Student_df["Sid"].tolist()
print(Student_ids)
links = []
for i, row in Student_df.groupby('Sid').size().items():
link = "https://api.school.com/2020/mark.json?sid="+str(i)
links.append(link)
for x in links:
normalize(getdata(x),'marks')
如果您有任何其他问题,或需要进一步的帮助,请随时提问。
英文:
I have an API with student data like this, for every student id there will be a corresponding API link with mark details.
for example:
https://api.school.com/2020/students.json
{
"Students": [
{
"StudentName": "AAA",
"Sid": 1020,
"Saddress": "st.aaa",
"Sdob": "10-11-1999"
},
{
"StudentName": "BBB",
"Sid": 1021,
"Saddress": "st.bbb",
"Sdob": "11-11-1999"
},
{
"StudentName": "CCC",
"Sid": 1022,
"Saddress": "st.fff",
"Sdob": "05-12-1999"
},
{
"StudentName": "DDD",
"Sid": 1023,
"Saddress": "st.ddd",
"Sdob": "15-09-1999"
},
{
"StudentName": "EEE",
"Sid": 1024,
"Saddress": "st.eee",
"Sdob": "10-11-1999"
},
{
"StudentName": "FFF",
"Sid": 1025,
"Saddress": "st.ddd",
"Sdob": "20-11-1999"
},
{
"StudentName": "GGG",
"Sid": 1026,
"Saddress": "st.ggg",
"Sdob": "25-11-1999"
},
{
"StudentName": "JJJ",
"Sid": 1019,
"Saddress": "st.aaa",
"Sdob": "18-11-1999"
}
]
}
https://api.school.com/2020/mark.json?sid=1020
{
"marks": [
{
"English": 11,
"Math": 12,
"Art": 13,
"Science": 14,
"History": 15,
"Geography": 16,
"Physical Education": 17,
"Chemistry": 18,
"Physics": 19,
"Biology": 20
}
]
}
https://api.school.com/2020/mark.json?sid=1021
{
"marks": [
{
"English": 21,
"Math": 22,
"Art": 23,
"Science": 24,
"History": 25,
"Geography": 26,
"Physical Education": 27,
"Chemistry": 28,
"Physics": 29,
"Biology": 30
}
]
}
https://api.school.com/2020/mark.json?sid=1022
{
"marks": [
{
"English": 31,
"Math": 32,
"Art": 33,
"Science": 34,
"History": 35,
"Geography": 36,
"Physical Education": 37,
"Chemistry": 38,
"Physics": 39,
"Biology": 40
}
]
}
https://api.school.com/2020/mark.json?sid=1023
{
"marks": [
{
"English": 41,
"Math": 42,
"Art": 43,
"Science": 44,
"History": 45,
"Geography": 46,
"Physical Education": 47,
"Chemistry": 48,
"Physics": 49,
"Biology": 50
}
]
}
https://api.school.com/2020/mark.json?sid=1024
{
"marks": [
{
"English": 51,
"Math": 52,
"Art": 53,
"Science": 54,
"History": 55,
"Geography": 56,
"Physical Education": 57,
"Chemistry": 58,
"Physics": 59,
"Biology": 60
}
]
}
https://api.school.com/2020/mark.json?sid=1025
{
"marks": [
{
"English": 61,
"Math": 62,
"Art": 63,
"Science": 64,
"History": 65,
"Geography": 66,
"Physical Education": 67,
"Chemistry": 68,
"Physics": 69,
"Biology": 70
}
]
}
https://api.school.com/2020/mark.json?sid=1026
{
"marks": [
{
"English": 71,
"Math": 72,
"Art": 73,
"Science": 74,
"History": 75,
"Geography": 76,
"Physical Education": 77,
"Chemistry": 78,
"Physics": 79,
"Biology": 80
}
]
}
https://api.school.com/2020/mark.json?sid=1019
{
"marks": [
{
"English": 1,
"Math": 2,
"Art": 3,
"Science": 4,
"History": 5,
"Geography": 6,
"Physical Education": 7,
"Chemistry": 8,
"Physics": 9,
"Biology": 10
}
]
}
I need to get an output like this in a csv file
My code(not completed):
import requests
import json
import pandas as pd
def getdata(link):
try:
response = s.get(link)
except requests.exceptions.RequestException as e:
print('Request Exception Found!')
json_data = response.json()
return json_data
s = requests.session()
json_data = getdata('https://api.school.com/2020/students.json')
print(json_data)
def normalize(json_data,record_path):
temp = json.dumps(json_data)
ar = json.loads(temp)
df = pd.json_normalize(ar[record_path])
return df
Student_df = normalize(json_data, 'Students')
Student_ids = Student_df["Sid"].tolist()
print(Student_ids)
links = []
for i, row in Student_df.groupby('Sid').size().items():
link = "https://api.school.com/2020/mark.json?sid="+str(i)
links.append(link)
for x in links:
normalize(getdata(x),'marks')
is it possible to insert the StudentName and Sid along with the mark? when the program fetches the mark from each API, it should be able to map and add the StudentName and Sid.
if i saves each mark api data to a json file in the local machine, then i can use this code and it works perfectly
答案1
得分: 0
创建一个记录字典,其中键是Sid
,以便可以合并Student_df
和Marks_df
:
link = 'https://api.school.com/2020/mark.json?sid={sid}'
marks = {sid: normalize(getdata(link.format(sid=sid)), 'marks')
for sid in Student_df['Sid'].unique()}
Marks_df = pd.concat(marks).droplevel(1)
out = Student_df[['StudentName', 'Sid']].merge(Marks_df, left_on='Sid', right_index=True)
输出:
>>> out
StudentName Sid English Math Art Science History Geography Physical Education Chemistry Physics Biology
0 AAA 1020 11 12 13 14 15 16 17 18 19 20
1 BBB 1021 21 22 23 24 25 26 27 28 29 30
2 CCC 1022 31 32 33 34 35 36 37 38 39 40
3 DDD 1023 41 42 43 44 45 46 47 48 49 50
4 EEE 1024 51 52 53 54 55 56 57 58 59 60
5 FFF 1025 61 62 63 64 65 66 67 68 69 70
6 GGG 1026 71 72 73 74 75 76 77 78 79 80
7 JJJ 1019 1 2 3 4 5 6 7 8 9 10
使用FastAPI
的可重现示例:
# api.py
from fastapi import FastAPI
Students = [{'StudentName': 'AAA', 'Sid': 1020, 'Saddress': 'st.aaa', 'Sdob': '10-11-1999'},
{'StudentName': 'BBB', 'Sid': 1021, 'Saddress': 'st.bbb', 'Sdob': '11-11-1999'},
{'StudentName': 'CCC', 'Sid': 1022, 'Saddress': 'st.fff', 'Sdob': '05-12-1999'},
{'StudentName': 'DDD', 'Sid': 1023, 'Saddress': 'st.ddd', 'Sdob': '15-09-1999'},
{'StudentName': 'EEE', 'Sid': 1024, 'Saddress': 'st.eee', 'Sdob': '10-11-1999'},
{'StudentName': 'FFF', 'Sid': 1025, 'Saddress': 'st.ddd', 'Sdob': '20-11-1999'},
{'StudentName': 'GGG', 'Sid': 1026, 'Saddress': 'st.ggg', 'Sdob': '25-11-1999'},
{'StudentName': 'JJJ', 'Sid': 1019, 'Saddress': 'st.aaa', 'Sdob': '18-11-1999'}]
Marks = {'1020': {'marks': [{'English': 11, 'Math': 12, 'Art': 13, 'Science': 14, 'History': 15, 'Geography': 16, 'Physical Education': 17, 'Chemistry': 18, 'Physics': 19, 'Biology': 20}]},
'1021': {'marks': [{'English': 21, 'Math': 22, 'Art': 23, 'Science': 24, 'History': 25, 'Geography': 26, 'Physical Education': 27, 'Chemistry': 28, 'Physics': 29, 'Biology': 30}]},
'1022': {'marks': [{'English': 31, 'Math': 32, 'Art': 33, 'Science': 34, 'History': 35, 'Geography': 36, 'Physical Education': 37, 'Chemistry': 38, 'Physics': 39, 'Biology': 40}]},
'1023': {'marks': [{'English': 41, 'Math': 42, 'Art': 43, 'Science': 44, 'History': 45, 'Geography': 46, 'Physical Education': 47, 'Chemistry': 48, 'Physics': 49, 'Biology': 50}]},
'1024': {'marks': [{'English': 51, 'Math': 52, 'Art': 53, 'Science': 54, 'History': 55, 'Geography': 56, 'Physical Education': 57, 'Chemistry': 58, 'Physics': 59, 'Biology': 60}]},
'1025': {'marks': [{'English': 61, 'Math': 62, 'Art': 63, 'Science': 64, 'History': 65, 'Geography': 66, 'Physical Education': 67, 'Chemistry': 68, 'Physics': 69, 'Biology': 70}]},
'1026': {'marks': [{'English': 71, 'Math': 72, 'Art': 73, 'Science': 74, 'History': 75, 'Geography': 76, 'Physical Education': 77, 'Chemistry': 78, 'Physics': 79, 'Biology': 80}]},
'1019': {'marks': [{'English': 1, 'Math': 2, 'Art': 3, 'Science': 4, 'History': 5, 'Geography': 6, 'Physical Education': 7, 'Chemistry': 8, 'Physics': 9, 'Biology': 10}]}
app = FastAPI()
@app.get('/2020/students.json')
def students():
return Students
@app.get('/2020/mark.json')
def mark(sid):
return Marks[sid]
用法:
[...]$ uvicorn api:app --reload
# requests.get('http://localhost:8000/2020/students.json')
# requests.get('http://localhost:8000/2020/mark.json?sid=1019')
英文:
Create a dict of records where the key is Sid
so you can merge Student_df
and Marks_df
:
link = 'https://api.school.com/2020/mark.json?sid={sid}'
marks = {sid: normalize(getdata(link.format(sid=sid)), 'marks')
for sid in Student_df['Sid'].unique()}
Marks_df = pd.concat(marks).droplevel(1)
out = Student_df[['StudentName', 'Sid']].merge(Marks_df, left_on='Sid', right_index=True)
Output:
>>> out
StudentName Sid English Math Art Science History Geography Physical Education Chemistry Physics Biology
0 AAA 1020 11 12 13 14 15 16 17 18 19 20
1 BBB 1021 21 22 23 24 25 26 27 28 29 30
2 CCC 1022 31 32 33 34 35 36 37 38 39 40
3 DDD 1023 41 42 43 44 45 46 47 48 49 50
4 EEE 1024 51 52 53 54 55 56 57 58 59 60
5 FFF 1025 61 62 63 64 65 66 67 68 69 70
6 GGG 1026 71 72 73 74 75 76 77 78 79 80
7 JJJ 1019 1 2 3 4 5 6 7 8 9 10
Reproducible example with FastAPI
:
# api.py
from fastapi import FastAPI
Students = [{'StudentName': 'AAA', 'Sid': 1020, 'Saddress': 'st.aaa', 'Sdob': '10-11-1999'}, {'StudentName': 'BBB', 'Sid': 1021, 'Saddress': 'st.bbb', 'Sdob': '11-11-1999'}, {'StudentName': 'CCC', 'Sid': 1022, 'Saddress': 'st.fff', 'Sdob': '05-12-1999'}, {'StudentName': 'DDD', 'Sid': 1023, 'Saddress': 'st.ddd', 'Sdob': '15-09-1999'}, {'StudentName': 'EEE', 'Sid': 1024, 'Saddress': 'st.eee', 'Sdob': '10-11-1999'}, {'StudentName': 'FFF', 'Sid': 1025, 'Saddress': 'st.ddd', 'Sdob': '20-11-1999'}, {'StudentName': 'GGG', 'Sid': 1026, 'Saddress': 'st.ggg', 'Sdob': '25-11-1999'}, {'StudentName': 'JJJ', 'Sid': 1019, 'Saddress': 'st.aaa', 'Sdob': '18-11-1999'}]
Marks = {'1020': {'marks': [{'English': 11, 'Math': 12, 'Art': 13, 'Science': 14, 'History': 15, 'Geography': 16, 'Physical Education': 17, 'Chemistry': 18, 'Physics': 19, 'Biology': 20}]}, '1021': {'marks': [{'English': 21, 'Math': 22, 'Art': 23, 'Science': 24, 'History': 25, 'Geography': 26, 'Physical Education': 27, 'Chemistry': 28, 'Physics': 29, 'Biology': 30}]}, '1022': {'marks': [{'English': 31, 'Math': 32, 'Art': 33, 'Science': 34, 'History': 35, 'Geography': 36, 'Physical Education': 37, 'Chemistry': 38, 'Physics': 39, 'Biology': 40}]}, '1023': {'marks': [{'English': 41, 'Math': 42, 'Art': 43, 'Science': 44, 'History': 45, 'Geography': 46, 'Physical Education': 47, 'Chemistry': 48, 'Physics': 49, 'Biology': 50}]}, '1024': {'marks': [{'English': 51, 'Math': 52, 'Art': 53, 'Science': 54, 'History': 55, 'Geography': 56, 'Physical Education': 57, 'Chemistry': 58, 'Physics': 59, 'Biology': 60}]}, '1025': {'marks': [{'English': 61, 'Math': 62, 'Art': 63, 'Science': 64, 'History': 65, 'Geography': 66, 'Physical Education': 67, 'Chemistry': 68, 'Physics': 69, 'Biology': 70}]}, '1026': {'marks': [{'English': 71, 'Math': 72, 'Art': 73, 'Science': 74, 'History': 75, 'Geography': 76, 'Physical Education': 77, 'Chemistry': 78, 'Physics': 79, 'Biology': 80}]}, '1019': {'marks': [{'English': 1, 'Math': 2, 'Art': 3, 'Science': 4, 'History': 5, 'Geography': 6, 'Physical Education': 7, 'Chemistry': 8, 'Physics': 9, 'Biology': 10}]}}
app = FastAPI()
@app.get('/2020/students.json')
def students():
return Students
@app.get('/2020/mark.json')
def mark(sid):
return Marks[sid]
Usage:
[...]$ uvicorn api:app --reload
# requests.get('http://localhost:8000/2020/students.json')
# requests.get('http://localhost:8000/2020/mark.json?sid=1019')
答案2
得分: 0
import concurrent.futures 使用 concurrent.futures 来异步从 API 获取分数数据。(你可以在 merge_frames() 方法中删除你不需要的列。)
import concurrent.futures
import json
import os
import pandas as pd
import requests
class StudentsMarks:
def __init__(self):
self.root_url = "https://api.school.com/2020/"
self.students = self.get_students()
self.processors = os.cpu_count()
def get_students(self) -> pd.DataFrame:
url = "students.json"
return self.get_data(url=url, record="Students")
def process_marks(self) -> pd.DataFrame:
student_ids = self.students["Sid"].tolist()
with concurrent.futures.ProcessPoolExecutor(max_workers=self.processors) as executor:
grades = pd.concat(executor.map(self.get_data, student_ids)).reset_index(drop=True)
return self.merge_frames(grades)
def get_data(self, sid: int = "", url: str = "mark.json?sid=", record: str = "marks") -> pd.DataFrame:
url = f"{self.root_url}{url}{sid}"
with requests.Session() as request:
response = request.get(url, timeout=30)
if response.status_code != 200:
print(response.raise_for_status())
data = json.loads(response.text)
return pd.json_normalize(data=data, record_path=record).assign(id=sid)
def merge_frames(self, grades: pd.DataFrame) -> pd.DataFrame:
df = pd.merge(left=self.students, right=grades, left_on="Sid", right_on="id", suffixes=("students", "grades"))
return df[df.columns.drop(list(df.filter(regex="students|grades"))]
if __name__ == "__main__":
final = StudentsMarks().process_marks()
print(final)
输出:
Saddress Sdob Sid StudentName Art Biology Chemistry English Geography History Math Physical Education Physics Science
0 st.aaa 10-31-1999 1020 AAA 13 20 18 11 16 15 12 17 19 14
1 st.bbb 11-19-1999 1021 BBB 23 30 28 21 26 25 22 27 29 24
2 st.fff 12-22-1999 1022 CCC 33 40 38 31 36 35 32 37 39 34
3 st.ddd 09-15-1999 1023 DDD 43 50 48 41 46 45 42 47 49 44
4 st.eee 02-08-1999 1024 EEE 53 60 58 51 56 55 52 57 59 54
5 st.ddd 05-14-1999 1025 FFF 63 70 68 61 66 65 62 67 69 64
6 st.ggg 11-25-1999 1026 GGG 73 80 78 71 76 75 72 77 79 74
7 st.aaa 11-18-1999 1019 JJJ 3 10 8 1 6 5 2 7 9 4
英文:
Using concurrent.futures to asynchronously fetch mark data from api. (you can drop any columns you don't need in merge_frames() method.)
import concurrent.futures
import json
import os
import pandas as pd
import requests
class StudentsMarks:
def __init__(self):
self.root_url = "https://api.school.com/2020/"
self.students = self.get_students()
self.processors = os.cpu_count()
def get_students(self) -> pd.DataFrame:
url = "students.json"
return self.get_data(url=url, record="Students")
def process_marks(self) -> pd.DataFrame:
student_ids = self.students["Sid"].tolist()
with concurrent.futures.ProcessPoolExecutor(max_workers=self.processors) as executor:
grades = pd.concat(executor.map(self.get_data, student_ids)).reset_index(drop=True)
return self.merge_frames(grades)
def get_data(self, sid: int = "", url: str = "mark.json?sid=", record: str = "marks") -> pd.DataFrame:
url = f"{self.root_url}{url}{sid}"
with requests.Session() as request:
response = request.get(url, timeout=30)
if response.status_code != 200:
print(response.raise_for_status())
data = json.loads(response.text)
return pd.json_normalize(data=data, record_path=record).assign(id=sid)
def merge_frames(self, grades: pd.DataFrame) -> pd.DataFrame:
df = pd.merge(left=self.students, right=grades, left_on="Sid", right_on="id", suffixes=("_students", "_grades"))
return df[df.columns.drop(list(df.filter(regex="students|grades")))]
if __name__ == "__main__":
final = StudentsMarks().process_marks()
print(final)
Output:
Saddress Sdob Sid StudentName Art Biology Chemistry English Geography History Math Physical Education Physics Science
0 st.aaa 10-31-1999 1020 AAA 13 20 18 11 16 15 12 17 19 14
1 st.bbb 11-19-1999 1021 BBB 23 30 28 21 26 25 22 27 29 24
2 st.fff 12-22-1999 1022 CCC 33 40 38 31 36 35 32 37 39 34
3 st.ddd 09-15-1999 1023 DDD 43 50 48 41 46 45 42 47 49 44
4 st.eee 02-08-1999 1024 EEE 53 60 58 51 56 55 52 57 59 54
5 st.ddd 05-14-1999 1025 FFF 63 70 68 61 66 65 62 67 69 64
6 st.ggg 11-25-1999 1026 GGG 73 80 78 71 76 75 72 77 79 74
7 st.aaa 11-18-1999 1019 JJJ 3 10 8 1 6 5 2 7 9 4
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论