2023年7月27日 14:49:26go评论64阅读模式

英文:

Efficient way to preprocess data for DETR model (object detection)

问题

import keras_core as keras  # 以便我可以使用 keras_cv
import keras_cv
from keras_cv import bounding_box
from keras_cv import visualization
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np

def visualize_dataset(inputs, value_range, rows, cols, bounding_box_format):
    inputs = next(iter(inputs.take(1)))
    images, bounding_boxes = inputs["images"], inputs["bounding_boxes"]
    visualization.plot_bounding_box_gallery(
        images,
        value_range=value_range,
        rows=rows,
        cols=cols,
        y_true=bounding_boxes,
        scale=5,
        font_scale=0.7,
        bounding_box_format=bounding_box_format,
        class_mapping=class_mapping,
    )


def unpackage_raw_tfds_inputs(inputs, bounding_box_format):
    image = inputs["image"]
    boxes = keras_cv.bounding_box.convert_format(
        inputs["objects"]["bbox"],
        images=image,
        source="rel_yxyx",
        target=bounding_box_format,
    )
    bounding_boxes = {
        "classes": tf.cast(inputs["objects"]["label"] + 1, dtype=tf.float32),
        "boxes": tf.cast(boxes, dtype=tf.float32),
    }
    return {"images": tf.cast(image, tf.float32), "bounding_boxes": bounding_boxes}

def load_pascal_voc(split, dataset, bounding_box_format):
    ds = tfds.load(dataset, split=split, with_info=False, shuffle_files=True)
    ds = ds.map(lambda x: unpackage_raw_tfds_inputs(x, bounding_box_format=bounding_box_format), num_parallel_calls=tf.data.AUTOTUNE)
    return ds


train_ds = load_pascal_voc(split="test", dataset="voc/2007", bounding_box_format="xywh"); print(tf.data.experimental.cardinality(train_ds))
val_ds = load_pascal_voc(split="validation", dataset="voc/2007", bounding_box_format="xywh"); print(tf.data.experimental.cardinality(val_ds))
test_ds = load_pascal_voc(split="train", dataset="voc/2007", bounding_box_format="xywh"); print(tf.data.experimental.cardinality(test_ds))

# 以便我可以可视化数据集
train_ds = train_ds.ragged_batch(1, drop_remainder=True)
val_ds = val_ds.ragged_batch(1, drop_remainder=True)
test_ds = test_ds.ragged_batch(1, drop_remainder=True)

用于在 TensorFlow 中从头训练 DETR 模型时，对训练数据的类别和边界框进行填充

def pad(dataset):
    images, classes, boxes = [], [], []  # (m, None, None, 3), (m, 42), (m, 42, 4)

    for x in dataset:
        images.append(list(x["images"][0].numpy()))

        classes.append(list(tf.keras.utils.pad_sequences(x["bounding_boxes"]["classes"].numpy(), maxlen=42, padding='post')[0]))

        padded_bboxes = np.zeros((1, 42, 4), dtype=np.float32)
        padded_bboxes[:, :np.shape(x["bounding_boxes"]["boxes"][0])[0], :] = x["bounding_boxes"]["boxes"][0]
        boxes.append(list(padded_bboxes[0]))

    dataset = tf.data.Dataset.from_tensor_slices((images, (classes, boxes)))
    return dataset

是否有其他更高效的方法可以在不将其从 tf.data.Dataset 格式转换为数组，然后再转换为 tf.data.Dataset 格式的情况下完成上述操作？

我尝试使用 .map() 对数据集进行操作，但出现了一个错误，提示我只能在 eager 模式下使用 tf.keras.utils.pad_sequences。

英文:

import keras_core as keras # so that i can use keras_cv
import keras_cv
from keras_cv import bounding_box
from keras_cv import visualization
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np

def visualize_dataset(inputs, value_range, rows, cols, bounding_box_format):
    inputs = next(iter(inputs.take(1)))
    images, bounding_boxes = inputs[&quot;images&quot;], inputs[&quot;bounding_boxes&quot;]
    visualization.plot_bounding_box_gallery(
        images,
        value_range=value_range,
        rows=rows,
        cols=cols,
        y_true=bounding_boxes,
        scale=5,
        font_scale=0.7,
        bounding_box_format=bounding_box_format,
        class_mapping=class_mapping,
    )


def unpackage_raw_tfds_inputs(inputs, bounding_box_format):
  image = inputs[&quot;image&quot;]
  boxes = keras_cv.bounding_box.convert_format(
      inputs[&quot;objects&quot;][&quot;bbox&quot;],
      images=image,
      source=&quot;rel_yxyx&quot;,
      target=bounding_box_format,
  )
  bounding_boxes = {
      &quot;classes&quot;: tf.cast(inputs[&quot;objects&quot;][&quot;label&quot;] + 1, dtype=tf.float32),
      &quot;boxes&quot;: tf.cast(boxes, dtype=tf.float32),
  }
  return {&quot;images&quot;: tf.cast(image, tf.float32), &quot;bounding_boxes&quot;: bounding_boxes}

def load_pascal_voc(split, dataset, bounding_box_format):
  ds = tfds.load(dataset, split=split, with_info=False, shuffle_files=True)
  ds = ds.map(lambda x: unpackage_raw_tfds_inputs(x, bounding_box_format=bounding_box_format), num_parallel_calls=tf.data.AUTOTUNE)
  return ds


train_ds = load_pascal_voc(split=&quot;test&quot;, dataset=&quot;voc/2007&quot;, bounding_box_format=&quot;xywh&quot;); print(tf.data.experimental.cardinality(train_ds))
val_ds = load_pascal_voc(split=&quot;validation&quot;, dataset=&quot;voc/2007&quot;, bounding_box_format=&quot;xywh&quot;); print(tf.data.experimental.cardinality(val_ds))
test_ds = load_pascal_voc(split=&quot;train&quot;, dataset=&quot;voc/2007&quot;, bounding_box_format=&quot;xywh&quot;); print(tf.data.experimental.cardinality(test_ds))

# so that I can visualize_dataset
train_ds = train_ds.ragged_batch(1, drop_remainder=True)
val_ds = val_ds.ragged_batch(1, drop_remainder=True)
test_ds = test_ds.ragged_batch(1, drop_remainder=True)

Padding the classes and bounding boxes for training DETR model from scratch in tensorflow

def pad(dataset):
  images, classes, boxes = [], [], [] # (m, None, None, 3), (m, 42), (m, 42, 4)

  for x in dataset:
    
    images.append(list(x[&quot;images&quot;][0].numpy()))

    classes.append(list(tf.keras.utils.pad_sequences(x[&quot;bounding_boxes&quot;][&quot;classes&quot;].numpy(), maxlen=42, padding=&#39;post&#39;)[0]))

    padded_bboxes = np.zeros((1, 42, 4), dtype=np.float32)
    padded_bboxes[: ,:np.shape(x[&quot;bounding_boxes&quot;][&quot;boxes&quot;][0])[0], :] = x[&quot;bounding_boxes&quot;][&quot;boxes&quot;][0]
    boxes.append(list(padded_bboxes[0]))

  dataset = tf.data.Dataset.from_tensor_slices((images, (classes, boxes)))  
  return dataset

Is there any other efficient method to do the above without converting them from tf.data.Dataset format to array then again convert to tf.data.Dataset format?

I tried to use .map() to the datasets but got an error that I can only use tf.keras.utils.pad_sequences in eager mode

答案1

得分: 0

请查看这里提出的问题，其中包含解释和解决方案。起初我有点困惑，因为在tf 2.x中，默认情况下eager模式是启用的，但在这里被禁用了。

我还没有检查过，但也请检查一下是否升级tf可以解决问题。我之所以这样说，是因为解决方案中提到未来的tf 2.x可能不会有这个问题。

如果你不需要使用map函数，那么你的解决方案已经足够好了。

编辑

在stackoverflow上有这个问题。它展示了如何在map()函数中启用eager模式执行。这应该可以解决你的问题。

英文:

Please check the issue raised here with the explanation and solution. I was a bit confused at first to see eager mode is disabled when by default it is enabled in tf 2.x.

I haven't checked this but also check if upgrading tf solves the problem or not. I am saying this because, the solution states in future tf 2.x may not have this problem.

If you don't need to use the map function then your solution is good enough.

EDIT

This question in stackoverflow. It shows how to enable eager mode execution within the map() function. This should solve your problem.

答案2

得分: 0

使用tf.pad函数

train_ds = load_pascal_voc(split="validation", dataset="voc/2007", bounding_box_format="xywh")

N = 42
max_height, max_width = 500, 500
def preprocess(x):
  return keras_cv.layers.Resizing(max_height, max_width, bounding_box_format="xywh", pad_to_aspect_ratio=True)(x["images"]),

 (tf.pad([x["bounding_boxes"]["classes"]], [[0,0], [N,N]])[0][N:-len(x["bounding_boxes"]["classes"])], tf.pad(x["bounding_boxes"]["boxes"], [[N,N], [0,0]])[N:-len(x["bounding_boxes"]["boxes"])])

TRAIN_DS = train_ds.map(lambda x: preprocess(x))

for i in TRAIN_DS.take(2): # test
  print(i[0].numpy().shape)
  print(i[1][0].numpy().shape)
  print(i[1][1].numpy().shape)

英文:

Use tf.pad function

train_ds = load_pascal_voc(split=&quot;validation&quot;, dataset=&quot;voc/2007&quot;, bounding_box_format=&quot;xywh&quot;)

N = 42
max_height, max_width = 500, 500
def preprocess(x):
  return keras_cv.layers.Resizing(max_height, max_width, bounding_box_format=&quot;xywh&quot;, pad_to_aspect_ratio=True)(x[&quot;images&quot;]),

 (tf.pad([x[&quot;bounding_boxes&quot;][&quot;classes&quot;]], [[0,0], [N,N]])[0][N:-len(x[&quot;bounding_boxes&quot;][&quot;classes&quot;])], tf.pad(x[&quot;bounding_boxes&quot;][&quot;boxes&quot;], [[N,N], [0,0]])[N:-len(x[&quot;bounding_boxes&quot;][&quot;boxes&quot;])])

TRAIN_DS = train_ds.map(lambda x: preprocess(x))

for i in TRAIN_DS.take(2): # test
  print(i[0].numpy().shape)
  print(i[1][0].numpy().shape)
  print(i[1][1].numpy().shape)

通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库，让每个人都能够通过互相帮助和分享经验来进步。

DETR模型（目标检测）的数据预处理的高效方法

问题

答案1

答案2

如何从给定的文本中提取所需的标签？

ValueError: 无法赋值 “value”，”Order.dish_name” 必须是一个 “Dish” 实例

以高效方式聚合测量数据

导入opcua xml文件

What's the correct way to type hint an empty list as a literal in python?

如何在Highcharts Gantt中更改本地化的星期名称

如何在同一个流中使用多个过滤器和映射函数？

如何使用Map/Set来将代码优化到O(n)？

.NET MAUI Android在GitHub Actions上构建失败，错误代码为1。

如何在Playwright视觉比较中屏蔽多个定位器？

在C++中，可以使用可变模板参数来检索类型的内部类型。

selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found

Creating and opening a URL to log in to Website via Basic Auth with Robot Framework/Selenium (Python)

AG Grid 在上下文菜单中以大文本形式打开

发表评论