问题

我使用了TensorFlow.js的YoloV7目标检测实现，但我无法正确缩放和处理边界框的比例。我得到了正确的空间输出，格式为[x1, y1, width, height]，但因为模型输入图像是640x640，所以我没有得到确切的位置：

[
    [
        371.74334716796875,
        101.12919616699219,
        19.002845764160156,
        39.24892807006836
    ],
    [
        45.18428421020508,
        181.0949249267578,
        66.98155212402344,
        74.84469604492188
    ],
    [
        405.8454284667969,
        239.25437927246094,
        92.49766540527344,
        278.452880859375
    ],
    [
        292.5257873535156,
        264.4200744628906,
        77.10870361328125,
        263.32293701171875
    ],
    [
        102.06099700927734,
        249.17529296875,
        71.49154663085938,
        326.78228759765625
    ],
    [
        200.99879455566406,
        256.24462890625,
        118.14222717285156,
        311.6120910644531
    ]
]

export const renderBoxes = (
    canvasRef,
    classThreshold,
    boxes_data,
    scores_data,
    classes_data,
    ratios
) => {
    const ctx = canvasRef.getContext("2d");
    ctx.clearRect(0, 0, ctx.canvas.width, ctx.canvas.height); // 清除画布

    const colors = new Colors();

    // 字体配置
    const font = `${Math.max(
        Math.round(Math.max(ctx.canvas.width, ctx.canvas.height) / 40),
        14
    )}px Arial`;
    ctx.font = font;
    ctx.textBaseline = "top";

    for (let i = 0; i < scores_data.length; ++i) {
        const klass = labels[classes_data[i]];
        const score = (scores_data[i] * 100).toFixed(1);
  
        let [x1, y1, x2, y2] = xywh2xyxy(boxes_data[i]);

        const width = x2 - x1;
        const height = y2 - y1;
  
        // 绘制边界框
        ctx.strokeStyle = "#B033FF";
        ctx.lineWidth = 2;
        ctx.strokeRect(x1, y1, width, height);
  
        // 绘制标签背景
        ctx.fillStyle = "#B033FF";
        const textWidth = ctx.measureText(klass + " - " + score + "%").width;
        const textHeight = parseInt(font, 10); // 十进制
        ctx.fillRect(x1 - 1, y1 - (textHeight + 2), textWidth + 2, textHeight + 2);
  
        // 绘制标签
        ctx.fillStyle = "#ffffff";
        ctx.fillText(klass + " - " + score + "%", x1 - 1, y1 - (textHeight + 2));
    }
};

const preprocess = (source: HTMLImageElement, modelWidth: number, modelHeight: number) => {
    let xRatio, yRatio; // 边界框的比例

    const input = tf.tidy(() => {
        const img = tf.browser.fromPixels(source);

        // 填充图像为正方形 => [n, m] 变为 [n, n], n > m
        const [h, w] = img.shape.slice(0, 2); // 获取源图像的宽度和高度
        const maxSize = Math.max(w, h); // 获取最大尺寸
        const imgPadded = img.pad([
        [0, maxSize - h], // 填充y [仅底部]
        [0, maxSize - w], // 填充x [仅右侧]
        [0, 0],
        ]);

        xRatio = maxSize / w; // 更新xRatio
        yRatio = maxSize / h; // 更新yRatio

        return tf.image
        .resizeBilinear(imgPadded, [modelWidth, modelHeight]) // 调整大小
        .div(255.0) // 归一化
        .transpose([2, 0, 1]) // ??
        .expandDims(0); // 添加批次
    });

    return [input, xRatio, yRatio];
};

const MODEL_URL = '/yolov7_web_model/model.json';
const model = await tf.loadGraphModel(MODEL_URL);
const model_dim = [640, 640];

const myImage = document.getElementById('image');
const [input, xRatio, yRatio] = preprocess(myImage, 640, 640);
const execution = model.execute(input);

const result = execution.arraySync()[0];
var detections = non_max_suppression(result);
const boxes =  shortenedCol(detections, [0,1,2,3]);
const scores = shortenedCol(detections, [4]);
const class_detect = shortenedCol(detections, [5]);

renderBoxes(canvasRef.value!, 0.2, boxes, scores, class_detect, [xRatio, yRatio]);

我尝试使用xRatio和yRatio之类的东西，但我不明白如何缩放不仅比例还有大小。如何才能缩放，以便它可以呈现在原始的1356x904图像上？

英文:

I'm using a TensorFlow.js implementation of YoloV7 object detection, but can't wrap my head around how to correctly scale and deal with ratios of bounding boxes. I get the correct spatial output in [x1, y1, width, height] format, but I don't get the exact locations correct, because the model input image is 640x640:

[
[
371.74334716796875,
101.12919616699219,
19.002845764160156,
39.24892807006836
],
[
45.18428421020508,
181.0949249267578,
66.98155212402344,
74.84469604492188
],
[
405.8454284667969,
239.25437927246094,
92.49766540527344,
278.452880859375
],
[
292.5257873535156,
264.4200744628906,
77.10870361328125,
263.32293701171875
],
[
102.06099700927734,
249.17529296875,
71.49154663085938,
326.78228759765625
],
[
200.99879455566406,
256.24462890625,
118.14222717285156,
311.6120910644531
]
]

export const renderBoxes = (
canvasRef,
classThreshold,
boxes_data,
scores_data,
classes_data,
ratios
) =&gt; {
const ctx = canvasRef.getContext(&quot;2d&quot;);
ctx.clearRect(0, 0, ctx.canvas.width, ctx.canvas.height); // clean canvas
const colors = new Colors();
// font configs
const font = `${Math.max(
Math.round(Math.max(ctx.canvas.width, ctx.canvas.height) / 40),
14
)}px Arial`;
ctx.font = font;
ctx.textBaseline = &quot;top&quot;;
for (let i = 0; i &lt; scores_data.length; ++i) {
const klass = labels[classes_data[i]];
const score = (scores_data[i] * 100).toFixed(1);
let [x1, y1, x2, y2] = xywh2xyxy(boxes_data[i]);
const width = x2 - x1;
const height = y2 - y1;
// Draw the bounding box.
ctx.strokeStyle = &quot;#B033FF&quot;;
ctx.lineWidth = 2;
ctx.strokeRect(x1, y1, width, height);
// Draw the label background.
ctx.fillStyle = &quot;#B033FF&quot;;
const textWidth = ctx.measureText(klass + &quot; - &quot; + score + &quot;%&quot;).width;
const textHeight = parseInt(font, 10); // base 10
ctx.fillRect(x1 - 1, y1 - (textHeight + 2), textWidth + 2, textHeight + 2);
// Draw labels
ctx.fillStyle = &quot;#ffffff&quot;;
ctx.fillText(klass + &quot; - &quot; + score + &quot;%&quot;, x1 - 1, y1 - (textHeight + 2));
}
};
const preprocess = (source: HTMLImageElement, modelWidth: number, modelHeight: number) =&gt; {
let xRatio, yRatio; // ratios for boxes
const input = tf.tidy(() =&gt; {
const img = tf.browser.fromPixels(source);
// padding image to square =&gt; [n, m] to [n, n], n &gt; m
const [h, w] = img.shape.slice(0, 2); // get source width and height
const maxSize = Math.max(w, h); // get max size
const imgPadded = img.pad([
[0, maxSize - h], // padding y [bottom only]
[0, maxSize - w], // padding x [right only]
[0, 0],
]);
xRatio = maxSize / w; // update xRatio
yRatio = maxSize / h; // update yRatio
return tf.image
.resizeBilinear(imgPadded, [modelWidth, modelHeight]) // resize frame
.div(255.0) // normalize
.transpose([2, 0, 1]) // ??
.expandDims(0); // add batch
});
return [input, xRatio, yRatio];
};

const MODEL_URL = &#39;/yolov7_web_model/model.json&#39;;
const model = await tf.loadGraphModel(MODEL_URL);
const model_dim = [640, 640];
const myImage = document.getElementById(&#39;image&#39;);
const [input, xRatio,yRatio] = preprocess(myImage, 640, 640);
const execution = model.execute(input);
const result = execution.arraySync()[0];
var detections = non_max_suppression(result);
const boxes =  shortenedCol(detections, [0,1,2,3]);
const scores = shortenedCol(detections, [4]);
const class_detect = shortenedCol(detections, [5]);
renderBoxes(canvasRef.value!, 0.2, boxes, scores, class_detect, [xRatio, yRatio]);

I've tried using things like xRatio and yRatio but I don't understand how I would scale not just the ratios but also the size. How do I scale this so that it can be rendered for the original 1356x904 image?

答案1

得分: 1

以下是您要翻译的内容：

Canvas scaling solution:

You have to reverse the initial letterboxing that is done in order to achieve a square image for the model, and then scale that to the image dimensions.

// Initial scale for letterbox image -&gt; model
ctx.scale(1, Math.max(...ratios));
// Post-Process scale for model output -&gt; actual dimensions
const horizontalScaleFactor = imageWidth / 640;
const verticalScaleFactor = imageHeight / 640;
ctx.scale(horizontalScaleFactor, verticalScaleFactor)

    canvasRef,
classThreshold,
boxes_data,
scores_data,
classes_data,
ratios,
imageWidth,
imageHeight
) =&gt; {
const ctx = canvasRef.getContext(&quot;2d&quot;);
ctx.clearRect(0, 0, ctx.canvas.width, ctx.canvas.height); // clean canvas
ctx.canvas.width = imageWidth;
ctx.canvas.height = imageHeight;
// Initial scale for letterbox image -&gt; model
ctx.scale(1, Math.max(...ratios));
// Post-Process scale for model output -&gt; actual dimensions
const horizontalScaleFactor = imageWidth / 640;
const verticalScaleFactor = imageHeight / 640;
ctx.scale(horizontalScaleFactor, verticalScaleFactor)
console.log({horizontalScaleFactor, verticalScaleFactor})
// font configs
const font = `${14}px Arial`;
ctx.font = font;
ctx.textBaseline = &quot;top&quot;;
const colors = new Colors();
for (let i = 0; i &lt; scores_data.length; ++i) {
const klass = labels[classes_data[i]];
const color = colors.get(classes_data[i]);
const score = (scores_data[i] * 100).toFixed(1);
let [x1, y1, x2, y2] = xywh2xyxy(boxes_data[i]);
const width = x2 - x1;
const height = y2 - y1;
// Draw the bounding box.
ctx.strokeStyle = Colors.hexToRgba(color, 0.5);
ctx.lineWidth = 2;
ctx.strokeRect(x1, y1, width, height);
// Draw the label background.
ctx.fillStyle = color;
const textWidth = ctx.measureText(klass + &quot; - &quot; + score + &quot;%&quot;).width;
const textHeight = parseInt(font, 10); // base 10
ctx.fillRect(x1 - 1, y1 - (textHeight + 2), textWidth + 2, textHeight + 2);
// Draw labels
ctx.fillStyle = &quot;#ffffff&quot;;
ctx.fillText(klass + &quot; - &quot; + score + &quot;%&quot;, x1 - 1, y1 - (textHeight + 2));
}
};

英文:

Canvas scaling solution:

You have to reverse the initial letterboxing that is done in order to achieve a square image for the model, and then scale that to the image dimensions.

// Initial scale for letterbox image -&gt; model
ctx.scale(1, Math.max(...ratios));
// Post-Process scale for model output -&gt; actual dimensions
const horizontalScaleFactor = imageWidth / 640;
const verticalScaleFactor = imageHeight / 640;
ctx.scale(horizontalScaleFactor, verticalScaleFactor)

export const renderBoxes = (
canvasRef,
classThreshold,
boxes_data,
scores_data,
classes_data,
ratios,
imageWidth,
imageHeight
) =&gt; {
const ctx = canvasRef.getContext(&quot;2d&quot;);
ctx.clearRect(0, 0, ctx.canvas.width, ctx.canvas.height); // clean canvas
ctx.canvas.width = imageWidth;
ctx.canvas.height = imageHeight;
// Initial scale for letterbox image -&gt; model
ctx.scale(1, Math.max(...ratios));
// Post-Process scale for model output -&gt; actual dimensions
const horizontalScaleFactor = imageWidth / 640;
const verticalScaleFactor = imageHeight / 640;
ctx.scale(horizontalScaleFactor, verticalScaleFactor)
console.log({horizontalScaleFactor, verticalScaleFactor})
// font configs
const font = `${14}px Arial`;
ctx.font = font;
ctx.textBaseline = &quot;top&quot;;
const colors = new Colors();
for (let i = 0; i &lt; scores_data.length; ++i) {
const klass = labels[classes_data[i]];
const color = colors.get(classes_data[i]);
const score = (scores_data[i] * 100).toFixed(1);
let [x1, y1, x2, y2] = xywh2xyxy(boxes_data[i]);
const width = x2 - x1;
const height = y2 - y1;
// Draw the bounding box.
ctx.strokeStyle = Colors.hexToRgba(color, 0.5);
ctx.lineWidth = 2;
ctx.strokeRect(x1, y1, width, height);
// Draw the label background.
ctx.fillStyle = color;
const textWidth = ctx.measureText(klass + &quot; - &quot; + score + &quot;%&quot;).width;
const textHeight = parseInt(font, 10); // base 10
ctx.fillRect(x1 - 1, y1 - (textHeight + 2), textWidth + 2, textHeight + 2);
// Draw labels
ctx.fillStyle = &quot;#ffffff&quot;;
ctx.fillText(klass + &quot; - &quot; + score + &quot;%&quot;, x1 - 1, y1 - (textHeight + 2));
}
};

通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库，让每个人都能够通过互相帮助和分享经验来进步。

如何正确渲染YoloV7框在图像上？

问题

答案1

Angular CDK 菜单在页面底部打开。

如何在React Native中删除身份验证令牌？

React setState 回调在相同更新时运行多次？

设置雷达图的步长大小。

What's the correct way to type hint an empty list as a literal in python?

如何在Highcharts Gantt中更改本地化的星期名称

如何在同一个流中使用多个过滤器和映射函数？

如何使用Map/Set来将代码优化到O(n)？

.NET MAUI Android在GitHub Actions上构建失败，错误代码为1。

如何在Playwright视觉比较中屏蔽多个定位器？

在C++中，可以使用可变模板参数来检索类型的内部类型。

selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found

Creating and opening a URL to log in to Website via Basic Auth with Robot Framework/Selenium (Python)

AG Grid 在上下文菜单中以大文本形式打开

发表评论