将单个CSV拆分为多个MySQL表,而不会引发错误或丢失未知列。

huangapple go评论65阅读模式
英文:

Split A Single CSV Into Multiple Mysql Tables Without Throwing Errors Unknown Columns Missing

问题

下面是SQL脚本的翻译部分:

-- 创建学生表
SET FOREIGN_KEY_CHECKS = 0;

-- 创建学生表
CREATE TABLE IF NOT EXISTS students (
  id INT AUTO_INCREMENT PRIMARY KEY,
  name VARCHAR(255) NOT NULL,
  class VARCHAR(10) NOT NULL
);

-- 创建科目表
CREATE TABLE IF NOT EXISTS subjects (
  id INT AUTO_INCREMENT PRIMARY KEY,
  student_id INT NOT NULL,
  subject_name VARCHAR(50) NOT NULL,
  subject_ca_score INT DEFAULT NULL,
  subject_exam_score INT DEFAULT NULL,
  subject_score INT DEFAULT NULL,
  subject_type VARCHAR(10) DEFAULT NULL,
  FOREIGN KEY (student_id) REFERENCES students(id)
);

-- 创建结果表
CREATE TABLE IF NOT EXISTS results (
  id INT AUTO_INCREMENT PRIMARY KEY,
  student_id INT NOT NULL,
  subject_name VARCHAR(50) NOT NULL,
  subject_type VARCHAR(10) NOT NULL,
  subject_ca_score INT NOT NULL,
  subject_exam_score INT NOT NULL,
  subject_score INT NOT NULL,
  grade VARCHAR(2) DEFAULT NULL,
  FOREIGN KEY (student_id) REFERENCES students(id)
);

-- 从CSV加载数据到科目表
LOAD DATA INFILE 'C:/Users/HP USER/Documents/mytest.csv'
INTO TABLE subjects
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS
(student_id, math_score_ca, math_score_exam, econ_score_ca, econ_score_exam, civic_score_ca, civic_score_exam, lit_score_ca, lit_score_exam)
SET
subject_name = 'Math',
subject_ca_score = math_score_ca,
subject_exam_score = math_score_exam,
subject_score = math_score_ca + math_score_exam,
subject_type = 'CA',
student_id = @last_student_id + 1;

-- 为每个科目插入记录到科目表
INSERT INTO subjects (student_id, subject_name, subject_ca_score, subject_exam_score, subject_score, subject_type)
SELECT student_id, 'Econ', econ_score_ca, econ_score_exam, econ_score_ca + econ_score_exam, 'CA'
FROM subjects
WHERE student_id = @last_student_id + 1;

INSERT INTO subjects (student_id, subject_name, subject_ca_score, subject_exam_score, subject_score, subject_type)
SELECT student_id, 'Civic', civic_score_ca, civic_score_exam, civic_score_ca + civic_score_exam, 'CA'
FROM subjects
WHERE student_id = @last_student_id + 1;

INSERT INTO subjects (student_id, subject_name, subject_ca_score, subject_exam_score, subject_score, subject_type)
SELECT student_id, 'Lit', lit_score_ca, lit_score_exam, lit_score_ca + lit_score_exam, 'CA'
FROM subjects
WHERE student_id = @last_student_id + 1;

-- 计算成绩并插入到结果表
INSERT INTO results (student_id, subject_name, subject_type, subject_ca_score, subject_exam_score, subject_score, grade)
SELECT student_id, subject_name, 'CA', subject_ca_score, subject_exam_score, subject_ca_score + subject_exam_score,
  CASE
    WHEN subject_ca_score + subject_exam_score IS NULL THEN NULL
    WHEN subject_ca_score + subject_exam_score >= 90 THEN 'A'
    WHEN subject_ca_score + subject_exam_score >= 80 THEN 'B'
    WHEN subject_ca_score + subject_exam_score >= 70 THEN 'C'
    WHEN subject_ca_score + subject_exam_score >= 60 THEN 'D'
    ELSE 'F'
  END
FROM subjects
WHERE student_id = @last_student_id +1;

-- 显示学生的姓名和他们的成绩
SELECT s.name, r.subject_name, r.grade
FROM students s
INNER JOIN results r ON s.id = r.student_id
WHERE s.id = @last_student_id;

-- 脚本结束
SET FOREIGN_KEY_CHECKS = 1;

对于CSV文件的内容,这是学生姓名、班级和各科目成绩的示例数据。

英文:

I would like to create sql script that will split a csv into 3 mysql tables

However, whenever, i run the sql script below, it throws an error, no subject or that in the field list.I have doubled cross my csv, all the headers and the data are intact.This is the sql script below :***

The first part of the script create all the neccessary tables to actualize this task.

The second part load the csv into these tables using load data infile


-- Create students table
SET FOREIGN_KEY_CHECKS = 0;
-- Create students table
CREATE TABLE IF NOT EXISTS students (
id INT AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(255) NOT NULL,
class VARCHAR(10) NOT NULL
);
-- Create subjects table
CREATE TABLE IF NOT EXISTS subjects (
id INT AUTO_INCREMENT PRIMARY KEY,
student_id INT NOT NULL,
subject_name VARCHAR(50) NOT NULL,
subject_ca_score INT DEFAULT NULL,
subject_exam_score INT DEFAULT NULL,
subject_score INT DEFAULT NULL,
subject_type VARCHAR(10) DEFAULT NULL,
FOREIGN KEY (student_id) REFERENCES students(id)
);
-- Create results table
CREATE TABLE IF NOT EXISTS results (
id INT AUTO_INCREMENT PRIMARY KEY,
student_id INT NOT NULL,
subject_name VARCHAR(50) NOT NULL,
subject_type VARCHAR(10) NOT NULL,
subject_ca_score INT NOT NULL,
subject_exam_score INT NOT NULL,
subject_score INT NOT NULL,
grade VARCHAR(2) DEFAULT NULL,
FOREIGN KEY (student_id) REFERENCES students(id)
);
-- Load data into the subjects table from CSV
LOAD DATA INFILE 'C:/Users/HP USER/Documents/mytest.csv'
INTO TABLE subjects
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS
(student_id, math_score_ca, math_score_exam, econ_score_ca, econ_score_exam, civic_score_ca, civic_score_exam, lit_score_ca, lit_score_exam)
SET
subject_name = 'Math',
subject_ca_score = math_score_ca,
subject_exam_score = math_score_exam,
subject_score = math_score_ca + math_score_exam,
subject_type = 'CA',
student_id = @last_student_id + 1;
-- Insert records for each subject into the subjects table
INSERT INTO subjects (student_id, subject_name, subject_ca_score, subject_exam_score, subject_score, subject_type)
SELECT student_id, 'Econ', econ_score_ca, econ_score_exam, econ_score_ca + econ_score_exam, 'CA'
FROM subjects
WHERE student_id = @last_student_id + 1;
INSERT INTO subjects (student_id, subject_name, subject_ca_score, subject_exam_score, subject_score, subject_type)
SELECT student_id, 'Civic', civic_score_ca, civic_score_exam, civic_score_ca + civic_score_exam, 'CA'
FROM subjects
WHERE student_id = @last_student_id + 1;
INSERT INTO subjects (student_id, subject_name, subject_ca_score, subject_exam_score, subject_score, subject_type)
SELECT student_id, 'Lit', lit_score_ca, lit_score_exam, lit_score_ca + lit_score_exam, 'CA'
FROM subjects
WHERE student_id = @last_student_id + 1;
-- Calculate grades and insert them into the results table
INSERT INTO results (student_id, subject_name, subject_type, subject_ca_score, subject_exam_score, subject_score, grade)
SELECT student_id, subject_name, 'CA', subject_ca_score, subject_exam_score, subject_ca_score + subject_exam_score,
CASE
WHEN subject_ca_score + subject_exam_score IS NULL THEN NULL
WHEN subject_ca_score + subject_exam_score >= 90 THEN 'A'
WHEN subject_ca_score + subject_exam_score >= 80 THEN 'B'
WHEN subject_ca_score + subject_exam_score >= 70 THEN 'C'
WHEN subject_ca_score + subject_exam_score >= 60 THEN 'D'
ELSE 'F'
END
FROM subjects
WHERE student_id = @last_student_id +1;
-- Display the student's name and their results
SELECT s.name, r.subject_name, r.grade
FROM students s
INNER JOIN results r ON s.id = r.student_id
WHERE s.id = @last_student_id;
-- End of script.
SET FOREIGN_KEY_CHECKS = 1;

This is the error:

SET FOREIGN_KEY_CHECKS = 0
> OK
> Query Time: 0.001s
-- Create students table
SET FOREIGN_KEY_CHECKS = 0
> OK
> Query Time: 0.009s
-- Create students table
CREATE TABLE IF NOT EXISTS students (
id INT AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(255) NOT NULL,
class VARCHAR(10) NOT NULL
)
> OK
> Query Time: 0.001s
-- Create subjects table
CREATE TABLE IF NOT EXISTS subjects (
id INT AUTO_INCREMENT PRIMARY KEY,
student_id INT NOT NULL,
subject_name VARCHAR(50) NOT NULL,
subject_ca_score INT DEFAULT NULL,
subject_exam_score INT DEFAULT NULL,
subject_score INT DEFAULT NULL,
subject_type VARCHAR(10) DEFAULT NULL,
FOREIGN KEY (student_id) REFERENCES students(id)
)
> OK
> Query Time: 0.006s
-- Create results table
CREATE TABLE IF NOT EXISTS results (
id INT AUTO_INCREMENT PRIMARY KEY,
student_id INT NOT NULL,
subject_name VARCHAR(50) NOT NULL,
subject_type VARCHAR(10) NOT NULL,
subject_ca_score INT NOT NULL,
subject_exam_score INT NOT NULL,
subject_score INT NOT NULL,
grade VARCHAR(2) DEFAULT NULL,
FOREIGN KEY (student_id) REFERENCES students(id)
)
> OK
> Query Time: 0.002s
-- Load data into the subjects table from CSV
LOAD DATA INFILE 'C:/Users/HP USER/Documents/mytest.csv'
INTO TABLE subjects
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS
(student_id, math_score_ca, math_score_exam, econ_score_ca, econ_score_exam, civic_score_ca, civic_score_exam, lit_score_ca, lit_score_exam)
SET
subject_name = 'Math',
subject_ca_score = math_score_ca,
subject_exam_score = math_score_exam,
subject_score = math_score_ca + math_score_exam,
subject_type = 'CA',
student_id = @last_student_id + 1
> 1054 - Unknown column 'math_score_ca' in 'field list'
> Query Time: 0.009s

This is csv file, when opened in with a text editor, notepad++

name,class,math_score_ca,math_score_exam,econ_score_ca,econ_score_exam,civic_score_ca,civic_score_exam,lit_score_ca,lit_score_exam
John Doe,SS3,70,80,75,85,80,90,85,95
Jane Doe,SS3,85,90,80,75,90,85,70,80

答案1

得分: 1

你的方案是不合逻辑的。

你会创建:

  • 分别的表格 ClassSubjectType
  • 表格 Student,它引用 Class
  • 表格 Subject,它引用 SubjectType
  • 表格 Result,它引用 StudentSubject

根据主题领域的细节,该方案可能包含更多的表格。此外,这些表格必须包含适当的唯一约束。

英文:

Your scheme is illogical.

You'd create:

  • Separate tables Class and SubjectType;
  • Table Student which refers to Class;
  • Table Subject which refers to SubjectType;
  • Table Result which refers to Student and Subject.

Depends on subject area details the scheme may contain more tables. Also these tables must contain suitable unique constraints.

答案2

得分: 0

以下是您要翻译的内容:

For your current approach to work with your existing structure (bad idea, see Akina's answer) you would need to do multiple passes of the CSV, first inserting the students (no idea where you are getting @last_student_id from) and then the pass for each subject.

-- ADD UNIQUE KEY TO students (name, class)
ALTER TABLE `students` ADD UNIQUE INDEX (`name`, `class`);

-- LOAD STUDENTS
LOAD DATA INFILE 'C:/Users/HP USER/Documents/mytest.csv' IGNORE
INTO TABLE students
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS
(@name, @class, @math_score_ca, @math_score_exam, @econ_score_ca, @econ_score_exam, @civic_score_ca, @civic_score_exam, @lit_score_ca, @lit_score_exam)
SET id = NULL, name = @name, class = @class;

-- REPEAT THIS STEP FOR EACH SUBJECT
LOAD DATA INFILE 'C:/Users/HP USER/Documents/mytest.csv'
INTO TABLE subjects
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS
(@name, @class, @math_score_ca, @math_score_exam, @econ_score_ca, @econ_score_exam, @civic_score_ca, @civic_score_exam, @lit_score_ca, @lit_score_exam)
SET
    id = NULL,
    student_id = (SELECT id FROM students WHERE name = @name AND class = @class),
    subject_name = 'Math',
    subject_ca_score = @math_score_ca,
    subject_exam_score = @math_score_exam,
    subject_score = @math_score_ca + @math_score_exam,
    subject_type = 'CA';

-- THEN YOUR INSERT INTO results

A better approach would be to normalize your data structure (this is only an example and would need "polishing" to meet your requirements):

CREATE TABLE IF NOT EXISTS classes (
    id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
    name VARCHAR(10) NOT NULL UNIQUE
);

CREATE TABLE IF NOT EXISTS students (
    id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
    name VARCHAR(255) NOT NULL,
    class_id INT UNSIGNED NOT NULL,
    UNIQUE (name, class_id),
    FOREIGN KEY (class_id) REFERENCES classes (id)
);

CREATE TABLE IF NOT EXISTS subjects (
    id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
    name VARCHAR(50) NOT NULL,
    type ENUM('CA' /* ADD OTHER SUBJECT TYPES HERE OR MOVE TO THEIR OWN TABLE */) DEFAULT NULL
);
INSERT INTO subjects VALUES (1, 'Math', 'CA'), (2, 'Econ', 'CA'), (3, 'Civic', 'CA'), (4, 'Lit', 'CA');

CREATE TABLE IF NOT EXISTS results (
    id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
    student_id INT UNSIGNED NOT NULL,
    subject_id INT UNSIGNED NOT NULL,
    ca_score INT NOT NULL,
    exam_score INT NOT NULL,
    score INT AS (ca_score + exam_score),
    grade VARCHAR(2) AS (CASE WHEN score IS NULL THEN NULL WHEN score >= 90 THEN 'A' WHEN score >= 80 THEN 'B' WHEN score >= 70 THEN 'C' WHEN score >= 60 THEN 'D' ELSE 'F' END),
    FOREIGN KEY (student_id) REFERENCES students (id),
    FOREIGN KEY (subject_id) REFERENCES subjects (id)
);

And parse the CSV into a temporary table:

CREATE TEMPORARY TABLE import_csv (
    name VARCHAR(255) NOT NULL,
    class VARCHAR(10) NOT NULL,
    math_score_ca TINYINT UNSIGNED NOT NULL,
    math_score_exam TINYINT UNSIGNED NOT NULL,
    econ_score_ca TINYINT UNSIGNED NOT NULL,
    econ_score_exam TINYINT UNSIGNED NOT NULL,
    civic_score_ca TINYINT UNSIGNED NOT NULL,
    civic_score_exam TINYINT UNSIGNED NOT NULL,
    lit_score_ca TINYINT UNSIGNED NOT NULL,
    lit_score_exam TINYINT UNSIGNED NOT NULL
);

LOAD DATA INFILE 'C:/Users/HP USER/Documents/mytest.csv'
INTO TABLE import_csv
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS;

And then use that to populate your other tables:

INSERT IGNORE INTO classes (name)
SELECT DISTINCT class
FROM import_csv;

INSERT IGNORE INTO students (name, class_id)
SELECT DISTINCT csv.name, c.id
FROM import_csv csv
JOIN classes c ON c.name = csv.class;

INSERT INTO results (student_id, subject_id, ca_score, exam_score)
SELECT s.id, 1 /* Math */, csv.math_score_ca, csv.math_score_exam
FROM import_csv csv
JOIN classes c ON csv.class = c.name
JOIN students s ON csv.name = s.name AND c.id = s.class_id;

INSERT INTO results (student_id, subject_id, ca_score, exam_score)
SELECT s.id, 2 /* Econ */, csv.econ_score_ca, csv.econ_score_exam
FROM import_csv csv
JOIN classes c ON csv.class = c.name
JOIN students s ON csv.name = s.name AND c.id = s.class_id;

INSERT INTO results (student_id, subject_id, ca_score, exam_score)
SELECT s.id, 3 /* Civic */, csv.civic_score_ca, csv.civic_score_exam
FROM import_csv csv
JOIN classes c ON csv.class = c.name
JOIN students s ON csv.name = s.name AND c.id = s.class_id;

INSERT INTO results (student_id, subject_id, ca_score, exam_score)
SELECT s.id, 4 /* Lit */, csv.lit_score_ca, csv.lit_score_exam
FROM import_csv csv
JOIN classes c ON csv.class = c.name
JOIN students s ON csv.name = s.name AND c.id = s.class_id;
英文:

For your current approach to work with your existing structure (bad idea, see Akina's answer) you would need to do multiple passes of the CSV, first inserting the students (no idea where you are getting @last_student_id from) and then the pass for each subject.

-- ADD UNIQUE KEY TO students (name, class)
ALTER TABLE `students` ADD UNIQUE INDEX (`name`, `class`);
-- LOAD STUDENTS
LOAD DATA INFILE 'C:/Users/HP USER/Documents/mytest.csv' IGNORE
INTO TABLE students
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS
(@name, @class, @math_score_ca, @math_score_exam, @econ_score_ca, @econ_score_exam, @civic_score_ca, @civic_score_exam, @lit_score_ca, @lit_score_exam)
SET id = NULL, name = @name, class = @class;
-- REPEAT THIS STEP FOR EACH SUBJECT
LOAD DATA INFILE 'C:/Users/HP USER/Documents/mytest.csv'
INTO TABLE subjects
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS
(@name, @class, @math_score_ca, @math_score_exam, @econ_score_ca, @econ_score_exam, @civic_score_ca, @civic_score_exam, @lit_score_ca, @lit_score_exam)
SET
id = NULL,
student_id = (SELECT id FROM students WHERE name = @name AND class = @class),
subject_name = 'Math',
subject_ca_score = @math_score_ca,
subject_exam_score = @math_score_exam,
subject_score = @math_score_ca + @math_score_exam,
subject_type = 'CA';
-- THEN YOUR INSERT INTO results

A better approach would be to normalise your data structure (this is only an example and would need "polishing" to meet your requirements):

CREATE TABLE IF NOT EXISTS classes (
id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(10) NOT NULL UNIQUE
);
CREATE TABLE IF NOT EXISTS students (
id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(255) NOT NULL,
class_id INT UNSIGNED NOT NULL,
UNIQUE (name, class_id),
FOREIGN KEY (class_id) REFERENCES classes (id)
);
CREATE TABLE IF NOT EXISTS subjects (
id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(50) NOT NULL,
type ENUM('CA' /* ADD OTHER SUBJECT TYPES HERE OR MOVE TO THEIR OWN TABLE */) DEFAULT NULL
);
INSERT INTO subjects VALUES (1, 'Math', 'CA'), (2, 'Econ', 'CA'), (3, 'Civic', 'CA'), (4, 'Lit', 'CA');
CREATE TABLE IF NOT EXISTS results (
id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
student_id INT UNSIGNED NOT NULL,
subject_id INT UNSIGNED NOT NULL,
ca_score INT NOT NULL,
exam_score INT NOT NULL,
score INT AS (ca_score + exam_score),
grade VARCHAR(2) AS (CASE WHEN score IS NULL THEN NULL WHEN score >= 90 THEN 'A' WHEN score >= 80 THEN 'B' WHEN score >= 70 THEN 'C' WHEN score >= 60 THEN 'D' ELSE 'F' END),
FOREIGN KEY (student_id) REFERENCES students (id),
FOREIGN KEY (subject_id) REFERENCES subjects (id)
);

And parse the CSV into a temporary table:

CREATE TEMPORARY TABLE import_csv (
name VARCHAR(255) NOT NULL,
class VARCHAR(10) NOT NULL,
math_score_ca TINYINT UNSIGNED NOT NULL,
math_score_exam TINYINT UNSIGNED NOT NULL,
econ_score_ca TINYINT UNSIGNED NOT NULL,
econ_score_exam TINYINT UNSIGNED NOT NULL,
civic_score_ca TINYINT UNSIGNED NOT NULL,
civic_score_exam TINYINT UNSIGNED NOT NULL,
lit_score_ca TINYINT UNSIGNED NOT NULL,
lit_score_exam TINYINT UNSIGNED NOT NULL
);
LOAD DATA INFILE 'C:/Users/HP USER/Documents/mytest.csv'
INTO TABLE import_csv
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS;

And then use that to populate your other tables:

INSERT IGNORE INTO classes (name)
SELECT DISTINCT class
FROM import_csv;
INSERT IGNORE INTO students (name, class_id)
SELECT DISTINCT csv.name, c.id
FROM import_csv csv
JOIN classes c ON c.name = csv.class;
INSERT INTO results (student_id, subject_id, ca_score, exam_score)
SELECT s.id, 1 /* Math */, csv.math_score_ca, csv.math_score_exam
FROM import_csv csv
JOIN classes c ON csv.class = c.name
JOIN students s ON csv.name = s.name AND c.id = s.class_id;
INSERT INTO results (student_id, subject_id, ca_score, exam_score)
SELECT s.id, 2 /* Econ */, csv.econ_score_ca, csv.econ_score_exam
FROM import_csv csv
JOIN classes c ON csv.class = c.name
JOIN students s ON csv.name = s.name AND c.id = s.class_id;
INSERT INTO results (student_id, subject_id, ca_score, exam_score)
SELECT s.id, 3 /* Civic */, csv.civic_score_ca, csv.civic_score_exam
FROM import_csv csv
JOIN classes c ON csv.class = c.name
JOIN students s ON csv.name = s.name AND c.id = s.class_id;
INSERT INTO results (student_id, subject_id, ca_score, exam_score)
SELECT s.id, 4 /* Lit */, csv.lit_score_ca, csv.lit_score_exam
FROM import_csv csv
JOIN classes c ON csv.class = c.name
JOIN students s ON csv.name = s.name AND c.id = s.class_id;

huangapple
  • 本文由 发表于 2023年4月4日 03:40:24
  • 转载请务必保留本文链接:https://go.coder-hub.com/75923209.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定