英文:
How to correctly render YoloV7 boxes on image?
问题
我使用了TensorFlow.js的YoloV7目标检测实现,但我无法正确缩放和处理边界框的比例。我得到了正确的空间输出,格式为[x1, y1, width, height]
,但因为模型输入图像是640x640,所以我没有得到确切的位置:
[
[
371.74334716796875,
101.12919616699219,
19.002845764160156,
39.24892807006836
],
[
45.18428421020508,
181.0949249267578,
66.98155212402344,
74.84469604492188
],
[
405.8454284667969,
239.25437927246094,
92.49766540527344,
278.452880859375
],
[
292.5257873535156,
264.4200744628906,
77.10870361328125,
263.32293701171875
],
[
102.06099700927734,
249.17529296875,
71.49154663085938,
326.78228759765625
],
[
200.99879455566406,
256.24462890625,
118.14222717285156,
311.6120910644531
]
]
export const renderBoxes = (
canvasRef,
classThreshold,
boxes_data,
scores_data,
classes_data,
ratios
) => {
const ctx = canvasRef.getContext("2d");
ctx.clearRect(0, 0, ctx.canvas.width, ctx.canvas.height); // 清除画布
const colors = new Colors();
// 字体配置
const font = `${Math.max(
Math.round(Math.max(ctx.canvas.width, ctx.canvas.height) / 40),
14
)}px Arial`;
ctx.font = font;
ctx.textBaseline = "top";
for (let i = 0; i < scores_data.length; ++i) {
const klass = labels[classes_data[i]];
const score = (scores_data[i] * 100).toFixed(1);
let [x1, y1, x2, y2] = xywh2xyxy(boxes_data[i]);
const width = x2 - x1;
const height = y2 - y1;
// 绘制边界框
ctx.strokeStyle = "#B033FF";
ctx.lineWidth = 2;
ctx.strokeRect(x1, y1, width, height);
// 绘制标签背景
ctx.fillStyle = "#B033FF";
const textWidth = ctx.measureText(klass + " - " + score + "%").width;
const textHeight = parseInt(font, 10); // 十进制
ctx.fillRect(x1 - 1, y1 - (textHeight + 2), textWidth + 2, textHeight + 2);
// 绘制标签
ctx.fillStyle = "#ffffff";
ctx.fillText(klass + " - " + score + "%", x1 - 1, y1 - (textHeight + 2));
}
};
const preprocess = (source: HTMLImageElement, modelWidth: number, modelHeight: number) => {
let xRatio, yRatio; // 边界框的比例
const input = tf.tidy(() => {
const img = tf.browser.fromPixels(source);
// 填充图像为正方形 => [n, m] 变为 [n, n], n > m
const [h, w] = img.shape.slice(0, 2); // 获取源图像的宽度和高度
const maxSize = Math.max(w, h); // 获取最大尺寸
const imgPadded = img.pad([
[0, maxSize - h], // 填充y [仅底部]
[0, maxSize - w], // 填充x [仅右侧]
[0, 0],
]);
xRatio = maxSize / w; // 更新xRatio
yRatio = maxSize / h; // 更新yRatio
return tf.image
.resizeBilinear(imgPadded, [modelWidth, modelHeight]) // 调整大小
.div(255.0) // 归一化
.transpose([2, 0, 1]) // ??
.expandDims(0); // 添加批次
});
return [input, xRatio, yRatio];
};
const MODEL_URL = '/yolov7_web_model/model.json';
const model = await tf.loadGraphModel(MODEL_URL);
const model_dim = [640, 640];
const myImage = document.getElementById('image');
const [input, xRatio, yRatio] = preprocess(myImage, 640, 640);
const execution = model.execute(input);
const result = execution.arraySync()[0];
var detections = non_max_suppression(result);
const boxes = shortenedCol(detections, [0,1,2,3]);
const scores = shortenedCol(detections, [4]);
const class_detect = shortenedCol(detections, [5]);
renderBoxes(canvasRef.value!, 0.2, boxes, scores, class_detect, [xRatio, yRatio]);
我尝试使用xRatio
和yRatio
之类的东西,但我不明白如何缩放不仅比例还有大小。如何才能缩放,以便它可以呈现在原始的1356x904图像上?
英文:
I'm using a TensorFlow.js implementation of YoloV7 object detection, but can't wrap my head around how to correctly scale and deal with ratios of bounding boxes. I get the correct spatial output in [x1, y1, width, height]
format, but I don't get the exact locations correct, because the model input image is 640x640:
[
[
371.74334716796875,
101.12919616699219,
19.002845764160156,
39.24892807006836
],
[
45.18428421020508,
181.0949249267578,
66.98155212402344,
74.84469604492188
],
[
405.8454284667969,
239.25437927246094,
92.49766540527344,
278.452880859375
],
[
292.5257873535156,
264.4200744628906,
77.10870361328125,
263.32293701171875
],
[
102.06099700927734,
249.17529296875,
71.49154663085938,
326.78228759765625
],
[
200.99879455566406,
256.24462890625,
118.14222717285156,
311.6120910644531
]
]
export const renderBoxes = (
canvasRef,
classThreshold,
boxes_data,
scores_data,
classes_data,
ratios
) => {
const ctx = canvasRef.getContext("2d");
ctx.clearRect(0, 0, ctx.canvas.width, ctx.canvas.height); // clean canvas
const colors = new Colors();
// font configs
const font = `${Math.max(
Math.round(Math.max(ctx.canvas.width, ctx.canvas.height) / 40),
14
)}px Arial`;
ctx.font = font;
ctx.textBaseline = "top";
for (let i = 0; i < scores_data.length; ++i) {
const klass = labels[classes_data[i]];
const score = (scores_data[i] * 100).toFixed(1);
let [x1, y1, x2, y2] = xywh2xyxy(boxes_data[i]);
const width = x2 - x1;
const height = y2 - y1;
// Draw the bounding box.
ctx.strokeStyle = "#B033FF";
ctx.lineWidth = 2;
ctx.strokeRect(x1, y1, width, height);
// Draw the label background.
ctx.fillStyle = "#B033FF";
const textWidth = ctx.measureText(klass + " - " + score + "%").width;
const textHeight = parseInt(font, 10); // base 10
ctx.fillRect(x1 - 1, y1 - (textHeight + 2), textWidth + 2, textHeight + 2);
// Draw labels
ctx.fillStyle = "#ffffff";
ctx.fillText(klass + " - " + score + "%", x1 - 1, y1 - (textHeight + 2));
}
};
const preprocess = (source: HTMLImageElement, modelWidth: number, modelHeight: number) => {
let xRatio, yRatio; // ratios for boxes
const input = tf.tidy(() => {
const img = tf.browser.fromPixels(source);
// padding image to square => [n, m] to [n, n], n > m
const [h, w] = img.shape.slice(0, 2); // get source width and height
const maxSize = Math.max(w, h); // get max size
const imgPadded = img.pad([
[0, maxSize - h], // padding y [bottom only]
[0, maxSize - w], // padding x [right only]
[0, 0],
]);
xRatio = maxSize / w; // update xRatio
yRatio = maxSize / h; // update yRatio
return tf.image
.resizeBilinear(imgPadded, [modelWidth, modelHeight]) // resize frame
.div(255.0) // normalize
.transpose([2, 0, 1]) // ??
.expandDims(0); // add batch
});
return [input, xRatio, yRatio];
};
const MODEL_URL = '/yolov7_web_model/model.json';
const model = await tf.loadGraphModel(MODEL_URL);
const model_dim = [640, 640];
const myImage = document.getElementById('image');
const [input, xRatio,yRatio] = preprocess(myImage, 640, 640);
const execution = model.execute(input);
const result = execution.arraySync()[0];
var detections = non_max_suppression(result);
const boxes = shortenedCol(detections, [0,1,2,3]);
const scores = shortenedCol(detections, [4]);
const class_detect = shortenedCol(detections, [5]);
renderBoxes(canvasRef.value!, 0.2, boxes, scores, class_detect, [xRatio, yRatio]);
I've tried using things like xRatio
and yRatio
but I don't understand how I would scale not just the ratios but also the size. How do I scale this so that it can be rendered for the original 1356x904 image?
答案1
得分: 1
以下是您要翻译的内容:
Canvas scaling solution:
You have to reverse the initial letterboxing that is done in order to achieve a square image for the model, and then scale that to the image dimensions.
// Initial scale for letterbox image -> model
ctx.scale(1, Math.max(...ratios));
// Post-Process scale for model output -> actual dimensions
const horizontalScaleFactor = imageWidth / 640;
const verticalScaleFactor = imageHeight / 640;
ctx.scale(horizontalScaleFactor, verticalScaleFactor)
canvasRef,
classThreshold,
boxes_data,
scores_data,
classes_data,
ratios,
imageWidth,
imageHeight
) => {
const ctx = canvasRef.getContext("2d");
ctx.clearRect(0, 0, ctx.canvas.width, ctx.canvas.height); // clean canvas
ctx.canvas.width = imageWidth;
ctx.canvas.height = imageHeight;
// Initial scale for letterbox image -> model
ctx.scale(1, Math.max(...ratios));
// Post-Process scale for model output -> actual dimensions
const horizontalScaleFactor = imageWidth / 640;
const verticalScaleFactor = imageHeight / 640;
ctx.scale(horizontalScaleFactor, verticalScaleFactor)
console.log({horizontalScaleFactor, verticalScaleFactor})
// font configs
const font = `${14}px Arial`;
ctx.font = font;
ctx.textBaseline = "top";
const colors = new Colors();
for (let i = 0; i < scores_data.length; ++i) {
const klass = labels[classes_data[i]];
const color = colors.get(classes_data[i]);
const score = (scores_data[i] * 100).toFixed(1);
let [x1, y1, x2, y2] = xywh2xyxy(boxes_data[i]);
const width = x2 - x1;
const height = y2 - y1;
// Draw the bounding box.
ctx.strokeStyle = Colors.hexToRgba(color, 0.5);
ctx.lineWidth = 2;
ctx.strokeRect(x1, y1, width, height);
// Draw the label background.
ctx.fillStyle = color;
const textWidth = ctx.measureText(klass + " - " + score + "%").width;
const textHeight = parseInt(font, 10); // base 10
ctx.fillRect(x1 - 1, y1 - (textHeight + 2), textWidth + 2, textHeight + 2);
// Draw labels
ctx.fillStyle = "#ffffff";
ctx.fillText(klass + " - " + score + "%", x1 - 1, y1 - (textHeight + 2));
}
};
英文:
Canvas scaling solution:
You have to reverse the initial letterboxing that is done in order to achieve a square image for the model, and then scale that to the image dimensions.
// Initial scale for letterbox image -> model
ctx.scale(1, Math.max(...ratios));
// Post-Process scale for model output -> actual dimensions
const horizontalScaleFactor = imageWidth / 640;
const verticalScaleFactor = imageHeight / 640;
ctx.scale(horizontalScaleFactor, verticalScaleFactor)
export const renderBoxes = (
canvasRef,
classThreshold,
boxes_data,
scores_data,
classes_data,
ratios,
imageWidth,
imageHeight
) => {
const ctx = canvasRef.getContext("2d");
ctx.clearRect(0, 0, ctx.canvas.width, ctx.canvas.height); // clean canvas
ctx.canvas.width = imageWidth;
ctx.canvas.height = imageHeight;
// Initial scale for letterbox image -> model
ctx.scale(1, Math.max(...ratios));
// Post-Process scale for model output -> actual dimensions
const horizontalScaleFactor = imageWidth / 640;
const verticalScaleFactor = imageHeight / 640;
ctx.scale(horizontalScaleFactor, verticalScaleFactor)
console.log({horizontalScaleFactor, verticalScaleFactor})
// font configs
const font = `${14}px Arial`;
ctx.font = font;
ctx.textBaseline = "top";
const colors = new Colors();
for (let i = 0; i < scores_data.length; ++i) {
const klass = labels[classes_data[i]];
const color = colors.get(classes_data[i]);
const score = (scores_data[i] * 100).toFixed(1);
let [x1, y1, x2, y2] = xywh2xyxy(boxes_data[i]);
const width = x2 - x1;
const height = y2 - y1;
// Draw the bounding box.
ctx.strokeStyle = Colors.hexToRgba(color, 0.5);
ctx.lineWidth = 2;
ctx.strokeRect(x1, y1, width, height);
// Draw the label background.
ctx.fillStyle = color;
const textWidth = ctx.measureText(klass + " - " + score + "%").width;
const textHeight = parseInt(font, 10); // base 10
ctx.fillRect(x1 - 1, y1 - (textHeight + 2), textWidth + 2, textHeight + 2);
// Draw labels
ctx.fillStyle = "#ffffff";
ctx.fillText(klass + " - " + score + "%", x1 - 1, y1 - (textHeight + 2));
}
};
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论