Frame Normalization¶
This example shows how you can normalize a frame before sending it to another neural network. Many neural network models
require frames with RGB values (pixels) in range between -0.5
to 0.5
. ColorCamera’s preview outputs
values between 0
and 255
. Simple custom model, created with PyTorch (link here, tutorial here),
allows users to specify mean and scale factors that will be applied to all frame values (pixels).
On the host, values are converted back to 0
-255
, so they can be displayed by OpenCV.
Note
This is just a demo, for normalization you should use OpenVINO’s model optimizer arguments --mean_values
and --scale_values
.
Setup¶
Please run the install script to download all required dependencies. Please note that this script must be ran from git context, so you have to download the depthai-python repository first and then run the script
git clone https://github.com/luxonis/depthai-python.git
cd depthai-python/examples
python3 install_requirements.py
For additional information, please follow installation guide
Source code¶
Also available on GitHub
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | #!/usr/bin/env python3
from pathlib import Path
import sys
import numpy as np
import cv2
import depthai as dai
SHAPE = 300
# Get argument first
nnPath = str((Path(__file__).parent / Path('../models/normalize_openvino_2021.4_4shave.blob')).resolve().absolute())
if len(sys.argv) > 1:
nnPath = sys.argv[1]
if not Path(nnPath).exists():
import sys
raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')
p = dai.Pipeline()
p.setOpenVINOVersion(dai.OpenVINO.VERSION_2021_4)
camRgb = p.createColorCamera()
# Model expects values in FP16, as we have compiled it with `-ip FP16`
camRgb.setFp16(True)
camRgb.setInterleaved(False)
camRgb.setPreviewSize(SHAPE, SHAPE)
nn = p.createNeuralNetwork()
nn.setBlobPath(nnPath)
nn.setNumInferenceThreads(2)
script = p.create(dai.node.Script)
script.setScript("""
# Run script only once. We could also send these values from host.
# Model formula:
# output = (input - mean) / scale
# This configuration will subtract all frame values (pixels) by 127.5
# 0.0 .. 255.0 -> -127.5 .. 127.5
data = NNData(2)
data.setLayer("mean", [127.5])
node.io['mean'].send(data)
# This configuration will divide all frame values (pixels) by 255.0
# -127.5 .. 127.5 -> -0.5 .. 0.5
data = NNData(2)
data.setLayer("scale", [255.0])
node.io['scale'].send(data)
""")
# Re-use the initial values for multiplier/addend
script.outputs['mean'].link(nn.inputs['mean'])
nn.inputs['mean'].setWaitForMessage(False)
script.outputs['scale'].link(nn.inputs['scale'])
nn.inputs['scale'].setWaitForMessage(False)
# Always wait for the new frame before starting inference
camRgb.preview.link(nn.inputs['frame'])
# Send normalized frame values to host
nn_xout = p.createXLinkOut()
nn_xout.setStreamName("nn")
nn.out.link(nn_xout.input)
# Pipeline is defined, now we can connect to the device
with dai.Device(p) as device:
qNn = device.getOutputQueue(name="nn", maxSize=4, blocking=False)
shape = (3, SHAPE, SHAPE)
while True:
inNn = np.array(qNn.get().getData())
# Get back the frame. It's currently normalized to -0.5 - 0.5
frame = inNn.view(np.float16).reshape(shape).transpose(1, 2, 0)
# To get original frame back (0-255), we add multiply all frame values (pixels) by 255 and then add 127.5 to them
frame = (frame * 255.0 + 127.5).astype(np.uint8)
# Show the initial frame
cv2.imshow("Original frame", frame)
if cv2.waitKey(1) == ord('q'):
break
|
Also available on GitHub
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 | #include <chrono>
#include <cstdio>
#include <iostream>
// Inludes common necessary includes for development using depthai library
#include "depthai/depthai.hpp"
#include "utility.hpp"
int main(int argc, char** argv) {
using namespace std;
// Default blob path provided by Hunter private data download
// Applicable for easier example usage only
std::string nnPath(BLOB_PATH);
// If path to blob specified, use that
if(argc > 1) {
nnPath = std::string(argv[1]);
}
// Print which blob we are using
printf("Using blob at path: %s\n", nnPath.c_str());
// Create pipeline
dai::Pipeline pipeline;
pipeline.setOpenVINOVersion(dai::OpenVINO::Version::VERSION_2021_4);
// Define sources and outputs
auto camRgb = pipeline.create<dai::node::ColorCamera>();
// Model expects values in FP16, as we have compiled it with `-ip FP16`
camRgb->setFp16(true);
camRgb->setInterleaved(false);
camRgb->setPreviewSize(300, 300); // NN input
auto nn = pipeline.create<dai::node::NeuralNetwork>();
nn->setBlobPath(nnPath);
nn->setNumInferenceThreads(2);
auto script = pipeline.create<dai::node::Script>();
script->setScript(R"(
# Run script only once
# Model formula:
# output = (input - mean) / scale
# This configuration will subtract all frame values (pixels) by 127.5
# 0.0 .. 255.0 -> -127.5 .. 127.5
data = NNData(2)
data.setLayer("mean", [127.5])
node.io['mean'].send(data)
# This configuration will divide all frame values (pixels) by 255.0
# -127.5 .. 127.5 -> -0.5 .. 0.5
data = NNData(2)
data.setLayer("scale", [255.0])
node.io['scale'].send(data)
)");
// Re-use the initial values for mean/scale
script->outputs["mean"].link(nn->inputs["mean"]);
nn->inputs["mean"].setWaitForMessage(false);
script->outputs["scale"].link(nn->inputs["scale"]);
nn->inputs["scale"].setWaitForMessage(false);
// Always wait for the new frame before starting inference
camRgb->preview.link(nn->inputs["frame"]);
auto xout = pipeline.create<dai::node::XLinkOut>();
xout->setStreamName("nn");
nn->out.link(xout->input);
// Connect to device and start pipeline
dai::Device device(pipeline);
// Output queues will be used to get the rgb frames and nn data from the outputs defined above
auto qNn = device.getOutputQueue("nn", 4, false);
while(true) {
auto inNn = qNn->get<dai::NNData>();
// To get original frame back (0-255), we add multiply all frame values (pixels) by 255 and then add 127.5 to them.
cv::imshow("Original Frame", fromPlanarFp16(inNn->getFirstLayerFp16(), 300, 300, 127.5, 255.0));
int key = cv::waitKey(1);
if(key == 'q' || key == 'Q') {
return 0;
}
}
return 0;
}
|