DETR模型(目标检测)的数据预处理的高效方法

huangapple go评论64阅读模式
英文:

Efficient way to preprocess data for DETR model (object detection)

问题

import keras_core as keras  # 以便我可以使用 keras_cv
import keras_cv
from keras_cv import bounding_box
from keras_cv import visualization
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np

def visualize_dataset(inputs, value_range, rows, cols, bounding_box_format):
    inputs = next(iter(inputs.take(1)))
    images, bounding_boxes = inputs["images"], inputs["bounding_boxes"]
    visualization.plot_bounding_box_gallery(
        images,
        value_range=value_range,
        rows=rows,
        cols=cols,
        y_true=bounding_boxes,
        scale=5,
        font_scale=0.7,
        bounding_box_format=bounding_box_format,
        class_mapping=class_mapping,
    )


def unpackage_raw_tfds_inputs(inputs, bounding_box_format):
    image = inputs["image"]
    boxes = keras_cv.bounding_box.convert_format(
        inputs["objects"]["bbox"],
        images=image,
        source="rel_yxyx",
        target=bounding_box_format,
    )
    bounding_boxes = {
        "classes": tf.cast(inputs["objects"]["label"] + 1, dtype=tf.float32),
        "boxes": tf.cast(boxes, dtype=tf.float32),
    }
    return {"images": tf.cast(image, tf.float32), "bounding_boxes": bounding_boxes}

def load_pascal_voc(split, dataset, bounding_box_format):
    ds = tfds.load(dataset, split=split, with_info=False, shuffle_files=True)
    ds = ds.map(lambda x: unpackage_raw_tfds_inputs(x, bounding_box_format=bounding_box_format), num_parallel_calls=tf.data.AUTOTUNE)
    return ds


train_ds = load_pascal_voc(split="test", dataset="voc/2007", bounding_box_format="xywh"); print(tf.data.experimental.cardinality(train_ds))
val_ds = load_pascal_voc(split="validation", dataset="voc/2007", bounding_box_format="xywh"); print(tf.data.experimental.cardinality(val_ds))
test_ds = load_pascal_voc(split="train", dataset="voc/2007", bounding_box_format="xywh"); print(tf.data.experimental.cardinality(test_ds))

# 以便我可以可视化数据集
train_ds = train_ds.ragged_batch(1, drop_remainder=True)
val_ds = val_ds.ragged_batch(1, drop_remainder=True)
test_ds = test_ds.ragged_batch(1, drop_remainder=True)

用于在 TensorFlow 中从头训练 DETR 模型时,对训练数据的类别和边界框进行填充

def pad(dataset):
    images, classes, boxes = [], [], []  # (m, None, None, 3), (m, 42), (m, 42, 4)

    for x in dataset:
        images.append(list(x["images"][0].numpy()))

        classes.append(list(tf.keras.utils.pad_sequences(x["bounding_boxes"]["classes"].numpy(), maxlen=42, padding='post')[0]))

        padded_bboxes = np.zeros((1, 42, 4), dtype=np.float32)
        padded_bboxes[:, :np.shape(x["bounding_boxes"]["boxes"][0])[0], :] = x["bounding_boxes"]["boxes"][0]
        boxes.append(list(padded_bboxes[0]))

    dataset = tf.data.Dataset.from_tensor_slices((images, (classes, boxes)))
    return dataset

是否有其他更高效的方法可以在不将其从 tf.data.Dataset 格式转换为数组,然后再转换为 tf.data.Dataset 格式的情况下完成上述操作?

我尝试使用 .map() 对数据集进行操作,但出现了一个错误,提示我只能在 eager 模式下使用 tf.keras.utils.pad_sequences

英文:
import keras_core as keras # so that i can use keras_cv
import keras_cv
from keras_cv import bounding_box
from keras_cv import visualization
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np

def visualize_dataset(inputs, value_range, rows, cols, bounding_box_format):
    inputs = next(iter(inputs.take(1)))
    images, bounding_boxes = inputs["images"], inputs["bounding_boxes"]
    visualization.plot_bounding_box_gallery(
        images,
        value_range=value_range,
        rows=rows,
        cols=cols,
        y_true=bounding_boxes,
        scale=5,
        font_scale=0.7,
        bounding_box_format=bounding_box_format,
        class_mapping=class_mapping,
    )


def unpackage_raw_tfds_inputs(inputs, bounding_box_format):
  image = inputs["image"]
  boxes = keras_cv.bounding_box.convert_format(
      inputs["objects"]["bbox"],
      images=image,
      source="rel_yxyx",
      target=bounding_box_format,
  )
  bounding_boxes = {
      "classes": tf.cast(inputs["objects"]["label"] + 1, dtype=tf.float32),
      "boxes": tf.cast(boxes, dtype=tf.float32),
  }
  return {"images": tf.cast(image, tf.float32), "bounding_boxes": bounding_boxes}

def load_pascal_voc(split, dataset, bounding_box_format):
  ds = tfds.load(dataset, split=split, with_info=False, shuffle_files=True)
  ds = ds.map(lambda x: unpackage_raw_tfds_inputs(x, bounding_box_format=bounding_box_format), num_parallel_calls=tf.data.AUTOTUNE)
  return ds


train_ds = load_pascal_voc(split="test", dataset="voc/2007", bounding_box_format="xywh"); print(tf.data.experimental.cardinality(train_ds))
val_ds = load_pascal_voc(split="validation", dataset="voc/2007", bounding_box_format="xywh"); print(tf.data.experimental.cardinality(val_ds))
test_ds = load_pascal_voc(split="train", dataset="voc/2007", bounding_box_format="xywh"); print(tf.data.experimental.cardinality(test_ds))

# so that I can visualize_dataset
train_ds = train_ds.ragged_batch(1, drop_remainder=True)
val_ds = val_ds.ragged_batch(1, drop_remainder=True)
test_ds = test_ds.ragged_batch(1, drop_remainder=True)

Padding the classes and bounding boxes for training DETR model from scratch in tensorflow

def pad(dataset):
  images, classes, boxes = [], [], [] # (m, None, None, 3), (m, 42), (m, 42, 4)

  for x in dataset:
    
    images.append(list(x["images"][0].numpy()))

    classes.append(list(tf.keras.utils.pad_sequences(x["bounding_boxes"]["classes"].numpy(), maxlen=42, padding='post')[0]))

    padded_bboxes = np.zeros((1, 42, 4), dtype=np.float32)
    padded_bboxes[: ,:np.shape(x["bounding_boxes"]["boxes"][0])[0], :] = x["bounding_boxes"]["boxes"][0]
    boxes.append(list(padded_bboxes[0]))

  dataset = tf.data.Dataset.from_tensor_slices((images, (classes, boxes)))  
  return dataset

Is there any other efficient method to do the above without converting them from tf.data.Dataset format to array then again convert to tf.data.Dataset format?

I tried to use .map() to the datasets but got an error that I can only use tf.keras.utils.pad_sequences in eager mode

答案1

得分: 0

请查看这里提出的问题,其中包含解释和解决方案。起初我有点困惑,因为在tf 2.x中,默认情况下eager模式是启用的,但在这里被禁用了。

我还没有检查过,但也请检查一下是否升级tf可以解决问题。我之所以这样说,是因为解决方案中提到未来的tf 2.x可能不会有这个问题。

如果你不需要使用map函数,那么你的解决方案已经足够好了。

编辑

在stackoverflow上有这个问题。它展示了如何在map()函数中启用eager模式执行。这应该可以解决你的问题。

英文:

Please check the issue raised here with the explanation and solution. I was a bit confused at first to see eager mode is disabled when by default it is enabled in tf 2.x.

I haven't checked this but also check if upgrading tf solves the problem or not. I am saying this because, the solution states in future tf 2.x may not have this problem.

If you don't need to use the map function then your solution is good enough.

EDIT

This question in stackoverflow. It shows how to enable eager mode execution within the map() function. This should solve your problem.

答案2

得分: 0

使用tf.pad函数

train_ds = load_pascal_voc(split="validation", dataset="voc/2007", bounding_box_format="xywh")

N = 42
max_height, max_width = 500, 500
def preprocess(x):
  return keras_cv.layers.Resizing(max_height, max_width, bounding_box_format="xywh", pad_to_aspect_ratio=True)(x["images"]),

 (tf.pad([x["bounding_boxes"]["classes"]], [[0,0], [N,N]])[0][N:-len(x["bounding_boxes"]["classes"])], tf.pad(x["bounding_boxes"]["boxes"], [[N,N], [0,0]])[N:-len(x["bounding_boxes"]["boxes"])])

TRAIN_DS = train_ds.map(lambda x: preprocess(x))

for i in TRAIN_DS.take(2): # test
  print(i[0].numpy().shape)
  print(i[1][0].numpy().shape)
  print(i[1][1].numpy().shape)
英文:

Use tf.pad function

train_ds = load_pascal_voc(split="validation", dataset="voc/2007", bounding_box_format="xywh")

N = 42
max_height, max_width = 500, 500
def preprocess(x):
  return keras_cv.layers.Resizing(max_height, max_width, bounding_box_format="xywh", pad_to_aspect_ratio=True)(x["images"]),

 (tf.pad([x["bounding_boxes"]["classes"]], [[0,0], [N,N]])[0][N:-len(x["bounding_boxes"]["classes"])], tf.pad(x["bounding_boxes"]["boxes"], [[N,N], [0,0]])[N:-len(x["bounding_boxes"]["boxes"])])

TRAIN_DS = train_ds.map(lambda x: preprocess(x))

for i in TRAIN_DS.take(2): # test
  print(i[0].numpy().shape)
  print(i[1][0].numpy().shape)
  print(i[1][1].numpy().shape)

huangapple
  • 本文由 发表于 2023年7月27日 14:49:26
  • 转载请务必保留本文链接:https://go.coder-hub.com/76777134.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定