英文:
Efficient way to preprocess data for DETR model (object detection)
问题
import keras_core as keras # 以便我可以使用 keras_cv
import keras_cv
from keras_cv import bounding_box
from keras_cv import visualization
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
def visualize_dataset(inputs, value_range, rows, cols, bounding_box_format):
inputs = next(iter(inputs.take(1)))
images, bounding_boxes = inputs["images"], inputs["bounding_boxes"]
visualization.plot_bounding_box_gallery(
images,
value_range=value_range,
rows=rows,
cols=cols,
y_true=bounding_boxes,
scale=5,
font_scale=0.7,
bounding_box_format=bounding_box_format,
class_mapping=class_mapping,
)
def unpackage_raw_tfds_inputs(inputs, bounding_box_format):
image = inputs["image"]
boxes = keras_cv.bounding_box.convert_format(
inputs["objects"]["bbox"],
images=image,
source="rel_yxyx",
target=bounding_box_format,
)
bounding_boxes = {
"classes": tf.cast(inputs["objects"]["label"] + 1, dtype=tf.float32),
"boxes": tf.cast(boxes, dtype=tf.float32),
}
return {"images": tf.cast(image, tf.float32), "bounding_boxes": bounding_boxes}
def load_pascal_voc(split, dataset, bounding_box_format):
ds = tfds.load(dataset, split=split, with_info=False, shuffle_files=True)
ds = ds.map(lambda x: unpackage_raw_tfds_inputs(x, bounding_box_format=bounding_box_format), num_parallel_calls=tf.data.AUTOTUNE)
return ds
train_ds = load_pascal_voc(split="test", dataset="voc/2007", bounding_box_format="xywh"); print(tf.data.experimental.cardinality(train_ds))
val_ds = load_pascal_voc(split="validation", dataset="voc/2007", bounding_box_format="xywh"); print(tf.data.experimental.cardinality(val_ds))
test_ds = load_pascal_voc(split="train", dataset="voc/2007", bounding_box_format="xywh"); print(tf.data.experimental.cardinality(test_ds))
# 以便我可以可视化数据集
train_ds = train_ds.ragged_batch(1, drop_remainder=True)
val_ds = val_ds.ragged_batch(1, drop_remainder=True)
test_ds = test_ds.ragged_batch(1, drop_remainder=True)
用于在 TensorFlow 中从头训练 DETR 模型时,对训练数据的类别和边界框进行填充
def pad(dataset):
images, classes, boxes = [], [], [] # (m, None, None, 3), (m, 42), (m, 42, 4)
for x in dataset:
images.append(list(x["images"][0].numpy()))
classes.append(list(tf.keras.utils.pad_sequences(x["bounding_boxes"]["classes"].numpy(), maxlen=42, padding='post')[0]))
padded_bboxes = np.zeros((1, 42, 4), dtype=np.float32)
padded_bboxes[:, :np.shape(x["bounding_boxes"]["boxes"][0])[0], :] = x["bounding_boxes"]["boxes"][0]
boxes.append(list(padded_bboxes[0]))
dataset = tf.data.Dataset.from_tensor_slices((images, (classes, boxes)))
return dataset
是否有其他更高效的方法可以在不将其从 tf.data.Dataset
格式转换为数组,然后再转换为 tf.data.Dataset
格式的情况下完成上述操作?
我尝试使用 .map()
对数据集进行操作,但出现了一个错误,提示我只能在 eager 模式下使用 tf.keras.utils.pad_sequences
。
英文:
import keras_core as keras # so that i can use keras_cv
import keras_cv
from keras_cv import bounding_box
from keras_cv import visualization
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
def visualize_dataset(inputs, value_range, rows, cols, bounding_box_format):
inputs = next(iter(inputs.take(1)))
images, bounding_boxes = inputs["images"], inputs["bounding_boxes"]
visualization.plot_bounding_box_gallery(
images,
value_range=value_range,
rows=rows,
cols=cols,
y_true=bounding_boxes,
scale=5,
font_scale=0.7,
bounding_box_format=bounding_box_format,
class_mapping=class_mapping,
)
def unpackage_raw_tfds_inputs(inputs, bounding_box_format):
image = inputs["image"]
boxes = keras_cv.bounding_box.convert_format(
inputs["objects"]["bbox"],
images=image,
source="rel_yxyx",
target=bounding_box_format,
)
bounding_boxes = {
"classes": tf.cast(inputs["objects"]["label"] + 1, dtype=tf.float32),
"boxes": tf.cast(boxes, dtype=tf.float32),
}
return {"images": tf.cast(image, tf.float32), "bounding_boxes": bounding_boxes}
def load_pascal_voc(split, dataset, bounding_box_format):
ds = tfds.load(dataset, split=split, with_info=False, shuffle_files=True)
ds = ds.map(lambda x: unpackage_raw_tfds_inputs(x, bounding_box_format=bounding_box_format), num_parallel_calls=tf.data.AUTOTUNE)
return ds
train_ds = load_pascal_voc(split="test", dataset="voc/2007", bounding_box_format="xywh"); print(tf.data.experimental.cardinality(train_ds))
val_ds = load_pascal_voc(split="validation", dataset="voc/2007", bounding_box_format="xywh"); print(tf.data.experimental.cardinality(val_ds))
test_ds = load_pascal_voc(split="train", dataset="voc/2007", bounding_box_format="xywh"); print(tf.data.experimental.cardinality(test_ds))
# so that I can visualize_dataset
train_ds = train_ds.ragged_batch(1, drop_remainder=True)
val_ds = val_ds.ragged_batch(1, drop_remainder=True)
test_ds = test_ds.ragged_batch(1, drop_remainder=True)
Padding the classes and bounding boxes for training DETR model from scratch in tensorflow
def pad(dataset):
images, classes, boxes = [], [], [] # (m, None, None, 3), (m, 42), (m, 42, 4)
for x in dataset:
images.append(list(x["images"][0].numpy()))
classes.append(list(tf.keras.utils.pad_sequences(x["bounding_boxes"]["classes"].numpy(), maxlen=42, padding='post')[0]))
padded_bboxes = np.zeros((1, 42, 4), dtype=np.float32)
padded_bboxes[: ,:np.shape(x["bounding_boxes"]["boxes"][0])[0], :] = x["bounding_boxes"]["boxes"][0]
boxes.append(list(padded_bboxes[0]))
dataset = tf.data.Dataset.from_tensor_slices((images, (classes, boxes)))
return dataset
Is there any other efficient method to do the above without converting them from tf.data.Dataset
format to array then again convert to tf.data.Dataset
format?
I tried to use .map()
to the datasets but got an error that I can only use tf.keras.utils.pad_sequences
in eager mode
答案1
得分: 0
请查看这里提出的问题,其中包含解释和解决方案。起初我有点困惑,因为在tf 2.x
中,默认情况下eager
模式是启用的,但在这里被禁用了。
我还没有检查过,但也请检查一下是否升级tf
可以解决问题。我之所以这样说,是因为解决方案中提到未来的tf 2.x
可能不会有这个问题。
如果你不需要使用map
函数,那么你的解决方案已经足够好了。
编辑
在stackoverflow上有这个问题。它展示了如何在map()
函数中启用eager模式执行。这应该可以解决你的问题。
英文:
Please check the issue raised here with the explanation and solution. I was a bit confused at first to see eager
mode is disabled when by default it is enabled in tf 2.x
.
I haven't checked this but also check if upgrading tf
solves the problem or not. I am saying this because, the solution states in future tf 2.x
may not have this problem.
If you don't need to use the map
function then your solution is good enough.
EDIT
This question in stackoverflow. It shows how to enable eager mode execution within the map()
function. This should solve your problem.
答案2
得分: 0
使用tf.pad
函数
train_ds = load_pascal_voc(split="validation", dataset="voc/2007", bounding_box_format="xywh")
N = 42
max_height, max_width = 500, 500
def preprocess(x):
return keras_cv.layers.Resizing(max_height, max_width, bounding_box_format="xywh", pad_to_aspect_ratio=True)(x["images"]),
(tf.pad([x["bounding_boxes"]["classes"]], [[0,0], [N,N]])[0][N:-len(x["bounding_boxes"]["classes"])], tf.pad(x["bounding_boxes"]["boxes"], [[N,N], [0,0]])[N:-len(x["bounding_boxes"]["boxes"])])
TRAIN_DS = train_ds.map(lambda x: preprocess(x))
for i in TRAIN_DS.take(2): # test
print(i[0].numpy().shape)
print(i[1][0].numpy().shape)
print(i[1][1].numpy().shape)
英文:
Use tf.pad
function
train_ds = load_pascal_voc(split="validation", dataset="voc/2007", bounding_box_format="xywh")
N = 42
max_height, max_width = 500, 500
def preprocess(x):
return keras_cv.layers.Resizing(max_height, max_width, bounding_box_format="xywh", pad_to_aspect_ratio=True)(x["images"]),
(tf.pad([x["bounding_boxes"]["classes"]], [[0,0], [N,N]])[0][N:-len(x["bounding_boxes"]["classes"])], tf.pad(x["bounding_boxes"]["boxes"], [[N,N], [0,0]])[N:-len(x["bounding_boxes"]["boxes"])])
TRAIN_DS = train_ds.map(lambda x: preprocess(x))
for i in TRAIN_DS.take(2): # test
print(i[0].numpy().shape)
print(i[1][0].numpy().shape)
print(i[1][1].numpy().shape)
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论