Mono & MobilenetSSD¶
This example shows how to run MobileNetv2SSD on the right grayscale camera and how to display the neural network results on a preview of the right camera stream.
Similar samples:
Demo¶
Setup¶
Please run the install script to download all required dependencies. Please note that this script must be ran from git context, so you have to download the depthai-python repository first and then run the script
git clone https://github.com/luxonis/depthai-python.git
cd depthai-python/examples
python3 install_requirements.py
For additional information, please follow installation guide
This example script requires external file(s) to run. If you are using:
depthai-python, run
python3 examples/install_requirements.py
to download required file(s)dephtai-core, required file(s) will get downloaded automatically when building the example
Source code¶
Also available on GitHub
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | #!/usr/bin/env python3
from pathlib import Path
import sys
import cv2
import depthai as dai
import numpy as np
# Get argument first
nnPath = str((Path(__file__).parent / Path('../models/mobilenet-ssd_openvino_2021.4_6shave.blob')).resolve().absolute())
if len(sys.argv) > 1:
nnPath = sys.argv[1]
if not Path(nnPath).exists():
import sys
raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')
# MobilenetSSD label texts
labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
"diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
# Create pipeline
pipeline = dai.Pipeline()
# Define sources and outputs
monoRight = pipeline.create(dai.node.MonoCamera)
manip = pipeline.create(dai.node.ImageManip)
nn = pipeline.create(dai.node.MobileNetDetectionNetwork)
manipOut = pipeline.create(dai.node.XLinkOut)
nnOut = pipeline.create(dai.node.XLinkOut)
manipOut.setStreamName("right")
nnOut.setStreamName("nn")
# Properties
monoRight.setCamera("right")
monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_720_P)
# Convert the grayscale frame into the nn-acceptable form
manip.initialConfig.setResize(300, 300)
# The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case)
manip.initialConfig.setFrameType(dai.ImgFrame.Type.BGR888p)
nn.setConfidenceThreshold(0.5)
nn.setBlobPath(nnPath)
nn.setNumInferenceThreads(2)
nn.input.setBlocking(False)
# Linking
monoRight.out.link(manip.inputImage)
manip.out.link(nn.input)
manip.out.link(manipOut.input)
nn.out.link(nnOut.input)
# Connect to device and start pipeline
with dai.Device(pipeline) as device:
# Output queues will be used to get the grayscale frames and nn data from the outputs defined above
qRight = device.getOutputQueue("right", maxSize=4, blocking=False)
qDet = device.getOutputQueue("nn", maxSize=4, blocking=False)
frame = None
detections = []
# nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height
def frameNorm(frame, bbox):
normVals = np.full(len(bbox), frame.shape[0])
normVals[::2] = frame.shape[1]
return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)
def displayFrame(name, frame):
color = (255, 0, 0)
for detection in detections:
bbox = frameNorm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
cv2.putText(frame, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
cv2.putText(frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
# Show the frame
cv2.imshow(name, frame)
while True:
# Instead of get (blocking), we use tryGet (non-blocking) which will return the available data or None otherwise
inRight = qRight.tryGet()
inDet = qDet.tryGet()
if inRight is not None:
frame = inRight.getCvFrame()
if inDet is not None:
detections = inDet.detections
if frame is not None:
displayFrame("right", frame)
if cv2.waitKey(1) == ord('q'):
break
|
Also available on GitHub
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | #include <iostream>
#include "utility.hpp"
// Includes common necessary includes for development using depthai library
#include "depthai/depthai.hpp"
// MobilenetSSD label texts
static const std::vector<std::string> labelMap = {"background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus",
"car", "cat", "chair", "cow", "diningtable", "dog", "horse",
"motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"};
int main(int argc, char** argv) {
using namespace std;
// Default blob path provided by Hunter private data download
// Applicable for easier example usage only
std::string nnPath(BLOB_PATH);
// If path to blob specified, use that
if(argc > 1) {
nnPath = std::string(argv[1]);
}
// Print which blob we are using
printf("Using blob at path: %s\n", nnPath.c_str());
// Create pipeline
dai::Pipeline pipeline;
// Define sources and outputs
auto monoRight = pipeline.create<dai::node::MonoCamera>();
auto manip = pipeline.create<dai::node::ImageManip>();
auto nn = pipeline.create<dai::node::MobileNetDetectionNetwork>();
auto manipOut = pipeline.create<dai::node::XLinkOut>();
auto nnOut = pipeline.create<dai::node::XLinkOut>();
manipOut->setStreamName("right");
nnOut->setStreamName("nn");
// Properties
monoRight->setCamera("right");
monoRight->setResolution(dai::MonoCameraProperties::SensorResolution::THE_720_P);
// Convert the grayscale frame into the nn-acceptable form
manip->initialConfig.setResize(300, 300);
// The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case)
manip->initialConfig.setFrameType(dai::ImgFrame::Type::BGR888p);
nn->setConfidenceThreshold(0.5);
nn->setBlobPath(nnPath);
nn->setNumInferenceThreads(2);
nn->input.setBlocking(false);
// Linking
monoRight->out.link(manip->inputImage);
manip->out.link(nn->input);
manip->out.link(manipOut->input);
nn->out.link(nnOut->input);
// Connect to device and start pipeline
dai::Device device(pipeline);
// Output queues will be used to get the grayscale frames and nn data from the outputs defined above
auto qRight = device.getOutputQueue("right", 4, false);
auto qDet = device.getOutputQueue("nn", 4, false);
cv::Mat frame;
std::vector<dai::ImgDetection> detections;
// Add bounding boxes and text to the frame and show it to the user
auto displayFrame = [](std::string name, cv::Mat frame, std::vector<dai::ImgDetection>& detections) {
auto color = cv::Scalar(255, 0, 0);
// nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height
for(auto& detection : detections) {
int x1 = detection.xmin * frame.cols;
int y1 = detection.ymin * frame.rows;
int x2 = detection.xmax * frame.cols;
int y2 = detection.ymax * frame.rows;
uint32_t labelIndex = detection.label;
std::string labelStr = to_string(labelIndex);
if(labelIndex < labelMap.size()) {
labelStr = labelMap[labelIndex];
}
cv::putText(frame, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
std::stringstream confStr;
confStr << std::fixed << std::setprecision(2) << detection.confidence * 100;
cv::putText(frame, confStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, color);
cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX);
}
// Show the frame
cv::imshow(name, frame);
};
while(true) {
// Instead of get (blocking), we use tryGet (non-blocking) which will return the available data or None otherwise
auto inRight = qRight->tryGet<dai::ImgFrame>();
auto inDet = qDet->tryGet<dai::ImgDetections>();
if(inRight) {
frame = inRight->getCvFrame();
}
if(inDet) {
detections = inDet->detections;
}
if(!frame.empty()) {
displayFrame("right", frame, detections);
}
int key = cv::waitKey(1);
if(key == 'q' || key == 'Q') return 0;
}
return 0;
}
|