Multi-Input Frame Concationation¶

Example concatenates all 3 inputs with a simple custom model created with PyTorch (link here, tutorial here). It uses NeuralNetwork’s multiple input feature and links all 3 camera streams directly to the NeuralNetwork node.

Demo¶

https://user-images.githubusercontent.com/18037362/134209980-09c6e2f9-8a26-45d5-a6ad-c31d9e2816e1.png

Setup¶

Please run the install script to download all required dependencies. Please note that this script must be ran from git context, so you have to download the depthai-python repository first and then run the script

git clone https://github.com/luxonis/depthai-python.git
cd depthai-python/examples
python3 install_requirements.py

For additional information, please follow installation guide

Source code¶

Also available on GitHub

#!/usr/bin/env python3

from pathlib import Path
import sys
import numpy as np
import cv2
import depthai as dai
SHAPE = 300

# Get argument first
nnPath = str((Path(__file__).parent / Path('../models/concat_openvino_2021.4_6shave.blob')).resolve().absolute())
if len(sys.argv) > 1:
    nnPath = sys.argv[1]

if not Path(nnPath).exists():
    import sys
    raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')

p = dai.Pipeline()
p.setOpenVINOVersion(dai.OpenVINO.VERSION_2021_4)

camRgb = p.createColorCamera()
camRgb.setPreviewSize(SHAPE, SHAPE)
camRgb.setInterleaved(False)
camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)

def create_mono(p, socket):
    mono = p.create(dai.node.MonoCamera)
    mono.setBoardSocket(socket)
    mono.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)

    # ImageManip for cropping (face detection NN requires input image of 300x300) and to change frame type
    manip = p.create(dai.node.ImageManip)
    manip.initialConfig.setResize(300, 300)
    manip.initialConfig.setFrameType(dai.RawImgFrame.Type.BGR888p)
    mono.out.link(manip.inputImage)
    return manip.out

# NN that detects faces in the image
nn = p.createNeuralNetwork()
nn.setBlobPath(nnPath)
nn.setNumInferenceThreads(2)

camRgb.preview.link(nn.inputs['img2'])
create_mono(p, dai.CameraBoardSocket.CAM_B).link(nn.inputs['img1'])
create_mono(p, dai.CameraBoardSocket.CAM_C).link(nn.inputs['img3'])

# Send bouding box from the NN to the host via XLink
nn_xout = p.createXLinkOut()
nn_xout.setStreamName("nn")
nn.out.link(nn_xout.input)

# Pipeline is defined, now we can connect to the device
with dai.Device(p) as device:
    qNn = device.getOutputQueue(name="nn", maxSize=4, blocking=False)
    shape = (3, SHAPE, SHAPE * 3)

    while True:
        inNn = np.array(qNn.get().getFirstLayerFp16())
        # Planar INT8 frame
        frame = inNn.reshape(shape).astype(np.uint8).transpose(1, 2, 0)

        cv2.imshow("Concat", frame)

        if cv2.waitKey(1) == ord('q'):
            break

Also available on GitHub

#include <chrono>
#include <cstdio>
#include <iostream>

// Inludes common necessary includes for development using depthai library
#include "depthai/depthai.hpp"
#include "utility.hpp"

int main(int argc, char** argv) {
    using namespace std;
    // Default blob path provided by Hunter private data download
    // Applicable for easier example usage only
    std::string nnPath(BLOB_PATH);

    // If path to blob specified, use that
    if(argc > 1) {
        nnPath = std::string(argv[1]);
    }

    // Print which blob we are using
    printf("Using blob at path: %s\n", nnPath.c_str());

    // Create pipeline
    dai::Pipeline pipeline;
    pipeline.setOpenVINOVersion(dai::OpenVINO::Version::VERSION_2021_4);

    // Define sources and outputs
    auto camRgb = pipeline.create<dai::node::ColorCamera>();
    camRgb->setPreviewSize(300, 300);  // NN input
    camRgb->setInterleaved(false);
    camRgb->setColorOrder(dai::ColorCameraProperties::ColorOrder::BGR);

    auto right = pipeline.create<dai::node::MonoCamera>();
    right->setCamera("right");
    right->setResolution(dai::MonoCameraProperties::SensorResolution::THE_400_P);

    auto manipRight = pipeline.create<dai::node::ImageManip>();
    manipRight->initialConfig.setResize(300, 300);
    manipRight->initialConfig.setFrameType(dai::ImgFrame::Type::BGR888p);
    right->out.link(manipRight->inputImage);

    auto left = pipeline.create<dai::node::MonoCamera>();
    left->setCamera("left");
    left->setResolution(dai::MonoCameraProperties::SensorResolution::THE_400_P);

    auto manipLeft = pipeline.create<dai::node::ImageManip>();
    manipLeft->initialConfig.setResize(300, 300);
    manipLeft->initialConfig.setFrameType(dai::ImgFrame::Type::BGR888p);
    left->out.link(manipLeft->inputImage);

    auto nn = pipeline.create<dai::node::NeuralNetwork>();
    nn->setBlobPath(nnPath);
    nn->setNumInferenceThreads(2);

    manipLeft->out.link(nn->inputs["img1"]);
    camRgb->preview.link(nn->inputs["img2"]);
    manipRight->out.link(nn->inputs["img3"]);

    auto xout = pipeline.create<dai::node::XLinkOut>();
    xout->setStreamName("nn");
    nn->out.link(xout->input);

    // Connect to device and start pipeline
    dai::Device device(pipeline);

    // Output queues will be used to get the rgb frames and nn data from the outputs defined above
    auto qNn = device.getOutputQueue("nn", 4, false);

    while(true) {
        auto inNn = qNn->get<dai::NNData>();
        cv::imshow("Concat", fromPlanarFp16(inNn->getFirstLayerFp16(), 900, 300));

        int key = cv::waitKey(1);
        if(key == 'q' || key == 'Q') {
            return 0;
        }
    }
    return 0;
}

Got questions?

Head over to Discussion Forum for technical support or any other questions you might have.