You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
334 lines
13 KiB
334 lines
13 KiB
<!DOCTYPE html>
|
|
<html lang="en">
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<title>YOLOv8 tinygrad WebGPU</title>
|
|
<script type="module">
|
|
import yolov8 from "./net.js"
|
|
window.yolov8 = yolov8;
|
|
</script>
|
|
<style>
|
|
body {
|
|
text-align: center;
|
|
font-family: Arial, sans-serif;
|
|
margin: 0;
|
|
padding: 0;
|
|
overflow: hidden;
|
|
}
|
|
|
|
.video-container {
|
|
position: relative;
|
|
width: 100%;
|
|
height: 100vh;
|
|
margin: 0 auto;
|
|
display: flex;
|
|
align-items: center;
|
|
justify-content: center;
|
|
}
|
|
|
|
#video, #canvas {
|
|
position: absolute;
|
|
top: 0;
|
|
left: 0;
|
|
width: 100%;
|
|
height: auto;
|
|
}
|
|
|
|
.loader {
|
|
width: 48px;
|
|
height: 48px;
|
|
border: 5px solid #FFF;
|
|
border-bottom-color: transparent;
|
|
border-radius: 50%;
|
|
display: inline-block;
|
|
box-sizing: border-box;
|
|
animation: rotation 1s linear infinite;
|
|
}
|
|
|
|
@keyframes rotation {
|
|
0% {
|
|
transform: rotate(0deg);
|
|
}
|
|
100% {
|
|
transform: rotate(360deg);
|
|
}
|
|
}
|
|
|
|
#canvas {
|
|
background: transparent;
|
|
}
|
|
|
|
#fps-meter {
|
|
position: absolute;
|
|
top: 20px;
|
|
right: 20px;
|
|
background-color: rgba(0, 0, 0, 0.7);
|
|
color: white;
|
|
padding: 10px;
|
|
font-size: 18px;
|
|
border-radius: 5px;
|
|
z-index: 10;
|
|
}
|
|
|
|
h1 {
|
|
margin-top: 20px;
|
|
}
|
|
|
|
.loading-container {
|
|
display: flex;
|
|
flex-direction: column;
|
|
align-items: center;
|
|
justify-content: center;
|
|
position: fixed;
|
|
top: 0;
|
|
left: 0;
|
|
width: 100%;
|
|
height: 100%;
|
|
background-color: rgba(0, 0, 0, 0.6);
|
|
z-index: 10;
|
|
}
|
|
|
|
.loading-text {
|
|
font-size: 24px;
|
|
color: white;
|
|
margin-bottom: 20px;
|
|
}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<h2>YOLOv8 tinygrad WebGPU</h2>
|
|
<h2 id="wgpu-error" style="display: none; color: red;">Error: WebGPU is not supported in this browser</h2>
|
|
<div class="video-container">
|
|
<video id="video" muted autoplay playsinline></video>
|
|
<canvas id="canvas"></canvas>
|
|
<div id="fps-meter"></div>
|
|
|
|
<div id="div-loading" class="loading-container">
|
|
<p class="loading-text">Loading model</p>
|
|
<span class="loader"></span>
|
|
</div>
|
|
</div>
|
|
<script>
|
|
let net = null;
|
|
const modelInputSize = 416;
|
|
let lastCalledTime;
|
|
let fps = 0, accumFps = 0, frameCounter = 0;
|
|
|
|
const video = document.getElementById('video');
|
|
const canvas = document.getElementById('canvas');
|
|
const context = canvas.getContext('2d');
|
|
const offscreenCanvas = document.createElement('canvas');
|
|
const fpsMeter = document.getElementById('fps-meter');
|
|
const loadingContainer = document.getElementById('div-loading');
|
|
const wgpuError = document.getElementById('wgpu-error');
|
|
offscreenCanvas.width = modelInputSize;
|
|
offscreenCanvas.height = modelInputSize;
|
|
const offscreenContext = offscreenCanvas.getContext('2d');
|
|
|
|
|
|
if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
|
|
navigator.mediaDevices.getUserMedia({ audio: false, video: { facingMode: { ideal: "environment" }}}).then(function (stream) {
|
|
video.srcObject = stream;
|
|
video.onloadedmetadata = function() {
|
|
canvas.width = video.clientWidth;
|
|
canvas.height = video.clientHeight;
|
|
}
|
|
});
|
|
}
|
|
|
|
async function processFrame() {
|
|
if (video.videoWidth == 0 || video.videoHeight == 0) {
|
|
requestAnimationFrame(processFrame);
|
|
return;
|
|
}
|
|
|
|
if (!lastCalledTime) {
|
|
lastCalledTime = performance.now();
|
|
fps = 0;
|
|
} else {
|
|
const now = performance.now();
|
|
delta = (now - lastCalledTime)/1000.0;
|
|
lastCalledTime = now;
|
|
accumFps += 1/delta;
|
|
|
|
if (frameCounter++ >= 10) {
|
|
fps = accumFps/frameCounter;
|
|
frameCounter = 0;
|
|
accumFps = 0;
|
|
fpsMeter.innerText = `FPS: ${fps.toFixed(1)}`
|
|
}
|
|
}
|
|
|
|
const videoAspectRatio = video.videoWidth / video.videoHeight;
|
|
let targetWidth, targetHeight;
|
|
|
|
if (videoAspectRatio > 1) {
|
|
targetWidth = modelInputSize;
|
|
targetHeight = modelInputSize / videoAspectRatio;
|
|
} else {
|
|
targetHeight = modelInputSize;
|
|
targetWidth = modelInputSize * videoAspectRatio;
|
|
}
|
|
|
|
const offsetX = (modelInputSize - targetWidth) / 2;
|
|
const offsetY = (modelInputSize - targetHeight) / 2;
|
|
offscreenContext.clearRect(0, 0, modelInputSize, modelInputSize);
|
|
offscreenContext.drawImage(video, offsetX, offsetY, targetWidth, targetHeight);
|
|
const boxes = await detectObjectsOnFrame(offscreenContext);
|
|
drawBoxes(offscreenCanvas, boxes, targetWidth, targetHeight, offsetX, offsetY);
|
|
requestAnimationFrame(processFrame);
|
|
}
|
|
|
|
requestAnimationFrame(processFrame);
|
|
|
|
function drawBoxes(offscreenCanvas, boxes, targetWidth, targetHeight, offsetX, offsetY) {
|
|
const ctx = document.querySelector("canvas").getContext("2d");
|
|
ctx.clearRect(0, 0, canvas.width, canvas.height);
|
|
ctx.lineWidth = 3;
|
|
ctx.font = "30px serif";
|
|
const scaleX = canvas.width / targetWidth;
|
|
const scaleY = canvas.height / targetHeight;
|
|
|
|
boxes.forEach(([x1, y1, x2, y2, label]) => {
|
|
const classIndex = yolo_classes.indexOf(label);
|
|
const color = classColors[classIndex];
|
|
ctx.strokeStyle = color;
|
|
ctx.fillStyle = color;
|
|
const adjustedX1 = (x1 - offsetX) * scaleX;
|
|
const adjustedY1 = (y1 - offsetY) * scaleY;
|
|
const adjustedX2 = (x2 - offsetX) * scaleX;
|
|
const adjustedY2 = (y2 - offsetY) * scaleY;
|
|
const boxWidth = adjustedX2 - adjustedX1;
|
|
const boxHeight = adjustedY2 - adjustedY1;
|
|
ctx.strokeRect(adjustedX1, adjustedY1, boxWidth, boxHeight);
|
|
const textWidth = ctx.measureText(label).width;
|
|
ctx.fillRect(adjustedX1, adjustedY1 - 25, textWidth + 10, 25);
|
|
ctx.fillStyle = "#FFFFFF";
|
|
ctx.fillText(label, adjustedX1 + 5, adjustedY1 - 7);
|
|
});
|
|
}
|
|
|
|
async function detectObjectsOnFrame(offscreenContext) {
|
|
if (!net) {
|
|
let device = await getDevice();
|
|
if (!device) {
|
|
wgpuError.style.display = "block";
|
|
loadingContainer.style.display = "none";
|
|
}
|
|
net = await yolov8.load(device, "./net.safetensors");
|
|
loadingContainer.style.display = "none";
|
|
}
|
|
let start = performance.now();
|
|
const [input,img_width,img_height] = await prepareInput(offscreenContext);
|
|
console.log("Preprocess took: " + (performance.now() - start) + " ms");
|
|
start = performance.now();
|
|
const output = await net(new Float32Array(input));
|
|
console.log("Inference took: " + (performance.now() - start) + " ms");
|
|
start = performance.now();
|
|
let out = processOutput(output[0],img_width,img_height);
|
|
console.log("Postprocess took: " + (performance.now() - start) + " ms");
|
|
return out;
|
|
}
|
|
|
|
async function prepareInput(offscreenContext) {
|
|
return new Promise(resolve => {
|
|
const [img_width,img_height] = [modelInputSize, modelInputSize]
|
|
const imgData = offscreenContext.getImageData(0,0,modelInputSize,modelInputSize);
|
|
const pixels = imgData.data;
|
|
const red = [], green = [], blue = [];
|
|
|
|
for (let index=0; index<pixels.length; index+=4) {
|
|
red.push(pixels[index]/255.0);
|
|
green.push(pixels[index+1]/255.0);
|
|
blue.push(pixels[index+2]/255.0);
|
|
}
|
|
const input = [...red, ...green, ...blue];
|
|
resolve([input, img_width, img_height])
|
|
})
|
|
}
|
|
|
|
const getDevice = async () => {
|
|
if (!navigator.gpu) return false;
|
|
const adapter = await navigator.gpu.requestAdapter();
|
|
return await adapter.requestDevice({
|
|
requiredFeatures: ["shader-f16"],
|
|
powerPreference: "high-performance"
|
|
});
|
|
};
|
|
|
|
function processOutput(output, img_width, img_height) {
|
|
let boxes = [];
|
|
const numPredictions = Math.pow(modelInputSize/32, 2) * 21;
|
|
for (let index=0;index<numPredictions;index++) {
|
|
const [class_id,prob] = [...Array(80).keys()]
|
|
.map(col => [col, output[numPredictions*(col+4)+index]])
|
|
.reduce((accum, item) => item[1]>accum[1] ? item : accum,[0,0]);
|
|
|
|
if (prob < 0.25) continue;
|
|
const label = yolo_classes[class_id];
|
|
const xc = output[index];
|
|
const yc = output[numPredictions+index];
|
|
const w = output[2*numPredictions+index];
|
|
const h = output[3*numPredictions+index];
|
|
const x1 = (xc-w/2)/modelInputSize*img_width;
|
|
const y1 = (yc-h/2)/modelInputSize*img_height;
|
|
const x2 = (xc+w/2)/modelInputSize*img_width;
|
|
const y2 = (yc+h/2)/modelInputSize*img_height;
|
|
boxes.push([x1,y1,x2,y2,label,prob]);
|
|
}
|
|
|
|
boxes = boxes.sort((box1,box2) => box2[5]-box1[5])
|
|
const result = [];
|
|
while (boxes.length>0) {
|
|
result.push(boxes[0]);
|
|
boxes = boxes.filter(box => iou(boxes[0],box)<0.7);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
function iou(box1,box2) {
|
|
return intersection(box1,box2)/union(box1,box2);
|
|
}
|
|
|
|
function union(box1,box2) {
|
|
const [box1_x1,box1_y1,box1_x2,box1_y2] = box1;
|
|
const [box2_x1,box2_y1,box2_x2,box2_y2] = box2;
|
|
const box1_area = (box1_x2-box1_x1)*(box1_y2-box1_y1)
|
|
const box2_area = (box2_x2-box2_x1)*(box2_y2-box2_y1)
|
|
return box1_area + box2_area - intersection(box1,box2)
|
|
}
|
|
|
|
function intersection(box1,box2) {
|
|
const [box1_x1,box1_y1,box1_x2,box1_y2] = box1;
|
|
const [box2_x1,box2_y1,box2_x2,box2_y2] = box2;
|
|
const x1 = Math.max(box1_x1,box2_x1);
|
|
const y1 = Math.max(box1_y1,box2_y1);
|
|
const x2 = Math.min(box1_x2,box2_x2);
|
|
const y2 = Math.min(box1_y2,box2_y2);
|
|
return (x2-x1)*(y2-y1)
|
|
}
|
|
|
|
const yolo_classes = [
|
|
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
|
|
'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
|
|
'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase',
|
|
'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
|
|
'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
|
|
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
|
|
'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
|
|
'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
|
|
];
|
|
|
|
function generateColors(numColors) {
|
|
const colors = [];
|
|
for (let i = 0; i < 360; i += 360 / numColors) {
|
|
colors.push(`hsl(${i}, 100%, 50%)`);
|
|
}
|
|
return colors;
|
|
}
|
|
|
|
const classColors = generateColors(yolo_classes.length);
|
|
</script>
|
|
</body>
|
|
</html>
|
|
|