英文:
Restricting Roboflow model inference to a user-defined region in OpenCV
问题
我目前正在进行一个项目,我在其中使用Roboflow模型来检测视频中的台球。视频信号经过OpenCV处理,模型在OpenAI Kit(OAK)设备上运行。使用roboflowoak包来接口模型和设备。
我想添加一个功能,使用户可以在视频中绘制一个矩形,模型只会检测这个矩形内的台球。基本上,矩形之外的任何东西都应该被模型忽略。
以下是我迄今为止编写的代码:
import cv2
import time
import numpy as np
from roboflowoak import RoboflowOak
# 全局变量
ix, iy = -1, -1
drawing = False
rect_over = False
x_start, y_start, x_end, y_end = -1, -1, -1, -1
# 鼠标回调函数
def draw_rectangle(event, x, y, flags, param):
global ix, iy, drawing, rect_over, x_start, y_start, x_end, y_end
if event == cv2.EVENT_LBUTTONDOWN:
drawing = True
ix, iy = x, y
x_start, y_start = x, y
elif event == cv2.EVENT_MOUSEMOVE:
if drawing == True:
x_end, y_end = x, y
elif event == cv2.EVENT_LBUTTONUP:
drawing = False
rect_over = True
x_end, y_end = x, y
def main():
# 实例化RoboflowOak对象
rf = RoboflowOak(
model="billiards-y0wwp",
confidence=0.05,
overlap=0.2,
version="3",
api_key="My API Key",
rgb=True,
depth=True,
device=None,
blocking=True
)
# 创建命名窗口以附加鼠标回调函数
cv2.namedWindow('frame')
cv2.setMouseCallback('frame', draw_rectangle)
# 循环以连续运行模型推断并显示输出
while True:
t0 = time.time()
# 运行模型推断
result, frame, raw_frame, depth = rf.detect()
# 在帧上绘制矩形
if rect_over:
cv2.rectangle(frame, pt1=(x_start, y_start), pt2=(x_end, y_end), color=(0, 255, 0), thickness=2)
# 过滤掉矩形之外的检测结果
if rect_over:
filtered_predictions = []
for p in result['predictions']:
bbox = p['bbox'] # 假设预测有'bbox'键
x_center = bbox['left'] + bbox['width'] / 2
y_center = bbox['top'] + bbox['height'] / 2
if x_start < x_center < x_end and y_start < y_center < y_end:
filtered_predictions.append(p)
# 用过滤后的预测结果替换预测
result['predictions'] = filtered_predictions
# 打印推断时间
print(f"推断时间(毫秒): {1 / (time.time() - t0)}")
# 打印预测
print(f"预测: {]}")
# 规范化深度以进行可视化
max_depth = np.amax(depth)
normalized_depth = depth / max_depth
# 显示深度和帧
cv2.imshow("深度", normalized_depth)
cv2.imshow("帧", frame)
# 按下'q'键退出循环
if cv2.waitKey(1) == ord('q'):
break
if __name__ == '__main__':
main()
如您所见,我已经实现了鼠标回调来绘制矩形,并在推断后过滤掉矩形之外的检测结果。然而,我觉得这可能不是最有效的方式,因为模型仍然在处理整个帧。
英文:
I'm currently working on a project where I'm using a Roboflow model to detect pool balls in a video feed. The video feed is processed using OpenCV and the model is running on an OpenAI Kit (OAK) device. The model and device are interfaced using the roboflowoak package.
I want to add a feature where the user can draw a rectangle on the video feed, and the model will only detect pool balls within this rectangle. Essentially, anything outside this rectangle should be ignored by the model.
Here's the code I've written so far:
import cv2
import time
import numpy as np
from roboflowoak import RoboflowOak
# global variables
ix, iy = -1, -1
drawing = False
rect_over = False
x_start, y_start, x_end, y_end = -1, -1, -1, -1
# mouse callback function
def draw_rectangle(event, x, y, flags, param):
global ix, iy, drawing, rect_over, x_start, y_start, x_end, y_end
if event == cv2.EVENT_LBUTTONDOWN:
drawing = True
ix, iy = x, y
x_start, y_start = x, y
elif event == cv2.EVENT_MOUSEMOVE:
if drawing == True:
x_end, y_end = x, y
elif event == cv2.EVENT_LBUTTONUP:
drawing = False
rect_over = True
x_end, y_end = x, y
def main():
# Instantiate RoboflowOak object
rf = RoboflowOak(
model="billiards-y0wwp",
confidence=0.05,
overlap=0.2,
version="3",
api_key="My API Key",
rgb=True,
depth=True,
device=None,
blocking=True
)
# create named window to attach mouse callback function
cv2.namedWindow('frame')
cv2.setMouseCallback('frame', draw_rectangle)
# Loop to continuously run model inference and display output
while True:
t0 = time.time()
# Run model inference
result, frame, raw_frame, depth = rf.detect()
# Draw rectangle on frame
if rect_over:
cv2.rectangle(frame, pt1=(x_start, y_start), pt2=(x_end, y_end), color=(0, 255, 0), thickness=2)
# Filter out detections that are outside of the rectangle
if rect_over:
filtered_predictions = []
for p in result['predictions']:
bbox = p['bbox'] # Assuming the prediction has a 'bbox' key
x_center = bbox['left'] + bbox['width'] / 2
y_center = bbox['top'] + bbox['height'] / 2
if x_start < x_center < x_end and y_start < y_center < y_end:
filtered_predictions.append(p)
# Replace predictions with filtered predictions
result['predictions'] = filtered_predictions
# Print inference time
print(f"INFERENCE TIME IN MS: {1 / (time.time() - t0)}")
# Print predictions
print(f"PREDICTIONS: {]}")
# Normalize depth for visualization
max_depth = np.amax(depth)
normalized_depth = depth / max_depth
# Show depth and frame
cv2.imshow("depth", normalized_depth)
cv2.imshow("frame", frame)
# Exit loop on 'q' key press
if cv2.waitKey(1) == ord('q'):
break
if __name__ == '__main__':
main()
As you can see, I've implemented a mouse callback to draw the rectangle, and I'm filtering out detections outside this rectangle after the inference. However, I feel this might not be the most efficient way since the model is still processing the entire frame.
答案1
得分: 0
import cv2
import time
import numpy as np
class test_class():
def __init__(self):
# 初始化类变量
self.ix, self.iy = -1, -1
self.drawing = False
self.rect_over = False
self.x_start, self.y_start, self.x_end, self.y_end = -1, -1, -1, -1
self.toggle = False # 初始化我们的推断开关
self.frame = np.zeros((250,250,3), np.uint8) # 虚拟图像
# 鼠标回调函数
def draw_rectangle(self,event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
self.drawing = True
self.ix, self.iy = x, y
self.x_start, self.y_start = x, y
elif event == cv2.EVENT_MOUSEMOVE:
if self.drawing == True:
self.x_end, self.y_end = x, y
elif event == cv2.EVENT_LBUTTONUP:
self.drawing = False
self.rect_over = True
self.x_end, self.y_end = x, y
def inference(self): # 虚拟推断函数
# 在这里放置所有的AI/过滤器
print("我在运行AI程序")
time.sleep(5)
return
def main(self):
cv2.namedWindow('frame')
cv2.setMouseCallback('frame', self.draw_rectangle)
# 循环持续运行模型推断并显示输出
while True:
t0 = time.time()
f_copy = self.frame.copy() # 将我的空白图像复制,因为我没有你的数据流
# 运行模型推断
if self.toggle is True:
self.inference()
# 在帧上绘制矩形
if self.rect_over:
print("我在这里")
cv2.rectangle(f_copy, pt1=(self.x_start, self.y_start), pt2=(self.x_end, self.y_end), color=(0, 255, 0), thickness=2)
cv2.imshow("frame", f_copy)
if cv2.waitKey(1) == ord('f'): # 可能有更好的方法来做这个。无法中断推断步骤.....
self.toggle = not self.toggle # 简单的布尔切换
# 在按下'q'键时退出循环
if cv2.waitKey(1) == ord('q'):
break
if __name__ == '__main__':
my_program = test_class()
my_program.main()
英文:
import cv2
import time
import numpy as np
class test_class():
def __init__(self):
# global variables # We dont like global variables
# Initiate class variables
self.ix, self.iy = -1, -1
self.drawing = False
self.rect_over = False
self.x_start, self.y_start, self.x_end, self.y_end = -1, -1, -1, -1
self.toggle = False # INitialize our 'inference' toggle
self.frame = np.zeros((250,250,3), np.uint8) #Dummy image
# mouse callback function
def draw_rectangle(self,event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
self.drawing = True
self.ix, self.iy = x, y
self.x_start, self.y_start = x, y
elif event == cv2.EVENT_MOUSEMOVE:
if self.drawing == True:
self.x_end, self.y_end = x, y
elif event == cv2.EVENT_LBUTTONUP:
self.drawing = False
self.rect_over = True
self.x_end, self.y_end = x, y
def inference(self): # Dummy Inferencing function.
#you can put all your AI /filtering here
print("I am running AI stuff")
time.sleep(5)
return
def main(self):
cv2.namedWindow('frame')
cv2.setMouseCallback('frame', self.draw_rectangle)
# Loop to continuously run model inference and display output
while True:
t0 = time.time()
f_copy = self.frame.copy() #Copying my blank image as I don't have your stream
# Run model inference
#result, frame, raw_frame, depth = rf.detect()
if self.toggle is True:
self.inference()
# Draw rectangle on frame
if self.rect_over:
print("im here")
cv2.rectangle(f_copy, pt1=(self.x_start, self.y_start), pt2=(self.x_end, self.y_end), color=(0, 255, 0), thickness=2)
cv2.imshow("frame", f_copy)
if cv2.waitKey(1) == ord('f'): #probably a better way to do this. Cant exactly interrupt the inferencing step.....
self.toggle = not self.toggle # Simple boolean toggle
# Exit loop on 'q' key press
if cv2.waitKey(1) == ord('q'):
break
if __name__ == '__main__':
my_program = test_class()
my_program.main()
Point 1: Avoid globals as its not recommended
Point 2: Consider toggling your inferencing while you are adjusting your box. Inferencing is acting as a blocker so you can barely see anything due to the delay.
Point 3. Not sure if you wanted to have a real time square as you are drawing the box
I created a dummmy image and a dummy function for the AI inferencing. Note the delay in the prints when you toggle it on. This also affects the callback for your 'box drawing'. Hence a probably good idea to do it separately and not concurrently. If you really want to do the adjustment at the same time as the inferencing; you probably have to look at multithreading/multiprocessing.
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论