英文:
The requested array has an inhomogeneous shape after 1 dimensions when converting list to numpy array
问题
我正在尝试使用名为load_data_new
的函数加载训练和测试数据,该函数从topomaps/
文件夹读取数据,并从labels/
文件夹读取标签。它们都包含.npy
文件。
具体来说,topomaps/
文件夹包含:
例如,s01_trial03.npy
包含128个拓扑图,而s01_trial12
包含2944个拓扑图(即它们的形状可能不同!)
而labels/
文件夹包含:
此外,训练数据必须仅包含标签为0的拓扑图(而测试数据可以包含标签为0、1或2的拓扑图)。这是我的代码:
def load_data_new(topomap_folder: str, labels_folder: str, test_size: float = 0.2) -> tuple:
"""
Load and pair topomap data and corresponding label data from separate folders
:param topomap_folder: (str) The path to the folder containing topomaps .npy files
:param labels_folder: (str) The path to the folder containing labels .npy files
:param test_size: (float) The proportion of data to be allocated to the testing set (default is 0.2)
:return: (tuple) Two tuples, each containing a topomap ndarray and its corresponding label 1D-array.
Note:
The function assumes that the filenames of the topomaps and labels are in the same order.
It also assumes that there is a one-to-one correspondence between the topomap files and the label files.
If there are inconsistencies between the shapes of the topomap and label files, it will print a warning message.
Example:
topomap_folder = "topomaps"
labels_folder = "labels"
(x_train, y_train), (x_test, y_test) = load_data_new(topomap_folder, labels_folder, test_size=0.2)
"""
topomap_files = os.listdir(topomap_folder)
labels_files = os.listdir(labels_folder)
# Sort the files to ensure the order is consistent
topomap_files.sort()
labels_files.sort()
labels = []
topomaps = []
for topomap_file, label_file in zip(topomap_files, labels_files):
if topomap_file.endswith(".npy") and label_file.endswith(".npy"):
topomap_path = os.path.join(topomap_folder, topomap_file)
label_path = os.path.join(labels_folder, label_file)
topomap_data = np.load(topomap_path)
label_data = np.load(label_path)
if topomap_data.shape[0] != label_data.shape[0]:
raise ValueError(f"Warning: Inconsistent shapes for {topomap_file} and {label_file}")
topomaps.append(topomap_data)
labels.append(label_data)
x = np.array(topomaps)
y = np.array(labels)
# Training set only contains images whose label is 0 for anomaly detection
train_indices = np.where(y == 0)[0]
x_train = x[train_indices]
y_train = y[train_indices]
# Split the remaining data into testing sets
remaining_indices = np.where(y != 0)[0]
x_remaining = x[remaining_indices]
y_remaining = y[remaining_indices]
_, x_test, _, y_test = train_test_split(x_remaining, y_remaining, test_size=test_size)
return (x_train, y_train), (x_test, y_test)
(x_train, y_train), (x_test, y_test) = load_data_new("topomaps", "labels")
但不幸的是,我遇到了这个错误:
Traceback (most recent call last):
File "/Users/alex/PycharmProjects/VAE-EEG-XAI/vae.py", line 574, in <module>
(x_train, y_train), (x_test, y_test) = load_data_new("topomaps", "labels")
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/alex/PycharmProjects/VAE-EEG-XAI/vae.py", line 60, in load_data_new
x = np.array(topomaps)
^^^^^^^^^^^^^^^^^^
ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (851,) + inhomogeneous part.
这表明topomaps
列表中的元素具有不同的形状,导致在尝试将其转换为NumPy数组时出现不均匀的数组。这个错误是因为topomaps
列表中的各个拓扑图具有不同的形状,而NumPy数组需要具有一致形状的元素。
我该如何修复这个问题?
英文:
I am trying to load training and test data using a function named load_data_new
which reads data from topomaps/
folder and labels from labels/
folder. They both contain .npy
files.
Specifically topomaps/
folder contains:
where, for example, s01_trial03.npy
contains 128 topomaps while s01_trial12
contains 2944 topomaps (that is, they might differ in shape!)
while labels/
folder contains:
Moreover training data must contain only topomaps whose label is 0 (while test data can contain topomaps whose label is 0, 1 or 2). This is my code:
def load_data_new(topomap_folder: str, labels_folder: str, test_size: float = 0.2) -> tuple:
"""
Load and pair topomap data and corresponding label data from separate folders
:param topomap_folder: (str) The path to the folder containing topomaps .npy files
:param labels_folder: (str) The path to the folder containing labels .npy files
:param test_size: (float) The proportion of data to be allocated to the testing set (default is 0.2)
:return: (tuple) Two tuples, each containing a topomap ndarray and its corresponding label 1D-array.
Note:
The function assumes that the filenames of the topomaps and labels are in the same order.
It also assumes that there is a one-to-one correspondence between the topomap files and the label files.
If there are inconsistencies between the shapes of the topomap and label files, it will print a warning message.
Example:
topomap_folder = "topomaps"
labels_folder = "labels"
(x_train, y_train), (x_test, y_test) = load_data_new(topomap_folder, labels_folder, test_size=0.2)
"""
topomap_files = os.listdir(topomap_folder)
labels_files = os.listdir(labels_folder)
# Sort the files to ensure the order is consistent
topomap_files.sort()
labels_files.sort()
labels = []
topomaps = []
for topomap_file, label_file in zip(topomap_files, labels_files):
if topomap_file.endswith(".npy") and label_file.endswith(".npy"):
topomap_path = os.path.join(topomap_folder, topomap_file)
label_path = os.path.join(labels_folder, label_file)
topomap_data = np.load(topomap_path)
label_data = np.load(label_path)
if topomap_data.shape[0] != label_data.shape[0]:
raise ValueError(f"Warning: Inconsistent shapes for {topomap_file} and {label_file}")
topomaps.append(topomap_data)
labels.append(label_data)
x = np.array(topomaps)
y = np.array(labels)
# Training set only contains images whose label is 0 for anomaly detection
train_indices = np.where(y == 0)[0]
x_train = x[train_indices]
y_train = y[train_indices]
# Split the remaining data into testing sets
remaining_indices = np.where(y != 0)[0]
x_remaining = x[remaining_indices]
y_remaining = y[remaining_indices]
_, x_test, _, y_test = train_test_split(x_remaining, y_remaining, test_size=test_size)
return (x_train, y_train), (x_test, y_test)
(x_train, y_train), (x_test, y_test) = load_data_new("topomaps", "labels")
But unfortunately I am getting this error:
Traceback (most recent call last):
File "/Users/alex/PycharmProjects/VAE-EEG-XAI/vae.py", line 574, in <module>
(x_train, y_train), (x_test, y_test) = load_data_new("topomaps", "labels")
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/alex/PycharmProjects/VAE-EEG-XAI/vae.py", line 60, in load_data_new
x = np.array(topomaps)
^^^^^^^^^^^^^^^^^^
ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (851,) + inhomogeneous part.
Which indicates that the elements within the topomaps
list have different shapes, leading to an inhomogeneous array when trying to convert it to a NumPy array. This error occurs because the individual topomaps in the topomaps list have different shapes, and NumPy arrays require elements of consistent shape.
How may I fix?
答案1
得分: 0
我用以下方式解决了这个问题:
def load_data(topomaps_folder: str, labels_folder: str, test_size=0.2) -> tuple:
x, y = _create_dataset(topomaps_folder, labels_folder)
# 训练集仅包含标签为0的图像,用于异常检测
train_indices = np.where(y == 0)[0]
x_train = x[train_indices]
y_train = y[train_indices]
# 将剩余数据分割为测试集
remaining_indices = np.where(y != 0)[0]
x_remaining = x[remaining_indices]
y_remaining = y[remaining_indices]
_, x_test, _, y_test = train_test_split(x_remaining, y_remaining, test_size=test_size)
return (x_train, y_train), (x_test, y_test)
def _create_dataset(topomaps_folder, labels_folder):
topomaps_files = os.listdir(topomaps_folder)
labels_files = os.listdir(labels_folder)
topomaps_files.sort()
labels_files.sort()
x = []
y = []
n_files = len(topomaps_files)
for topomaps_file, labels_file in tqdm(zip(topomaps_files, labels_files), total=n_files, desc="加载数据集"):
topomaps_array = np.load(f"{topomaps_folder}/{topomaps_file}")
labels_array = np.load(f"{labels_folder}/{labels_file}")
if topomaps_array.shape[0] != labels_array.shape[0]:
raise Exception("形状必须相等")
for i in range(topomaps_array.shape[0]):
x.append(topomaps_array[i])
y.append(labels_array[i])
x = np.array(x)
y = np.array(y)
return x, y
以上是翻译好的代码部分。
英文:
I simply solved the issue this way:
def load_data(topomaps_folder: str, labels_folder: str, test_size=0.2) -> tuple:
x, y = _create_dataset(topomaps_folder, labels_folder)
# Training set only contains images whose label is 0 for anomaly detection
train_indices = np.where(y == 0)[0]
x_train = x[train_indices]
y_train = y[train_indices]
# Split the remaining data into testing sets
remaining_indices = np.where(y != 0)[0]
x_remaining = x[remaining_indices]
y_remaining = y[remaining_indices]
_, x_test, _, y_test = train_test_split(x_remaining, y_remaining, test_size=test_size)
return (x_train, y_train), (x_test, y_test)
def _create_dataset(topomaps_folder, labels_folder):
topomaps_files = os.listdir(topomaps_folder)
labels_files = os.listdir(labels_folder)
topomaps_files.sort()
labels_files.sort()
x = []
y = []
n_files = len(topomaps_files)
for topomaps_file, labels_file in tqdm(zip(topomaps_files, labels_files), total=n_files, desc="Loading data set"):
topomaps_array = np.load(f"{topomaps_folder}/{topomaps_file}")
labels_array = np.load(f"{labels_folder}/{labels_file}")
if topomaps_array.shape[0] != labels_array.shape[0]:
raise Exception("Shapes must be equal")
for i in range(topomaps_array.shape[0]):
x.append(topomaps_array[i])
y.append(labels_array[i])
x = np.array(x)
y = np.array(y)
return x, y
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论