openpilot is an open source driver assistance system. openpilot performs the functions of Automated Lane Centering and Adaptive Cruise Control for over 200 supported car makes and models.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

194 lines
7.8 KiB

# Implementation of waifu2x vgg7 in tinygrad.
# Obviously, not developed, supported, etc. by the original waifu2x author(s).
import numpy
from tinygrad.tensor import Tensor
from PIL import Image
from tinygrad.helpers import fetch
# File Formats
# tinygrad convolution tensor input layout is (1,c,y,x) - and therefore the form for all images used in the project
# tinygrad convolution tensor weight layout is (outC,inC,H,W) - this matches NCNN (and therefore KINNE), but not waifu2x json
def image_load(path) -> numpy.ndarray:
"""
Loads an image in the shape expected by other functions in this module.
Doesn't Tensor it, in case you need to do further work with it.
"""
# file
na = numpy.array(Image.open(path))
if na.shape[2] == 4:
# RGBA -> RGB (covers opaque images with alpha channels)
na = na[:,:,0:3]
# fix shape
na = numpy.moveaxis(na, [2,0,1], [0,1,2])
# shape is now (3,h,w), add 1
na = na.reshape(1,3,na.shape[1],na.shape[2])
# change type
na = na.astype("float32") / 255.0
return na
def image_save(path, na: numpy.ndarray):
"""
Saves an image of the shape expected by other functions in this module.
However, note this expects a numpy array.
"""
# change type
na = numpy.fmax(numpy.fmin(na * 255.0, 255), 0).astype("uint8")
# shape is now (1,3,h,w), remove 1
na = na.reshape(3,na.shape[2],na.shape[3])
# fix shape
na = numpy.moveaxis(na, [0,1,2], [2,0,1])
# shape is now (h,w,3)
# file
Image.fromarray(na).save(path)
# The Model
class Conv3x3Biased:
"""
A 3x3 convolution layer with some utility functions.
"""
def __init__(self, inC, outC, last = False):
# The properties must be named as "W" and "b".
# This is in an attempt to try and be roughly compatible with https://github.com/FHPythonUtils/Waifu2x
# though this cannot necessarily account for transposition and other such things.
# Massively overstate the weights to get them to be focused on,
# since otherwise the biases overrule everything
self.W = Tensor.uniform(outC, inC, 3, 3) * 16.0
# Layout-wise, blatant cheat, but serious_mnist does it. I'd guess channels either have to have a size of 1 or whatever the target is?
# Values-wise, entirely different blatant cheat.
# In most cases, use uniform bias, but tiny.
# For the last layer, use just 0.5, constant.
if last:
self.b = Tensor.zeros(1, outC, 1, 1) + 0.5
else:
self.b = Tensor.uniform(1, outC, 1, 1)
def forward(self, x):
# You might be thinking, "but what about padding?"
# Answer: Tiling is used to stitch everything back together, though you could pad the image before providing it.
return x.conv2d(self.W).add(self.b)
def get_parameters(self) -> list:
return [self.W, self.b]
def load_waifu2x_json(self, layer: dict):
# Weights in this file are outChannel,inChannel,X,Y.
# Not outChannel,inChannel,Y,X.
# Therefore, transpose it before assignment.
# I have long since forgotten how I worked this out.
self.W.assign(Tensor(layer["weight"]).reshape(shape=self.W.shape).transpose(2, 3))
self.b.assign(Tensor(layer["bias"]).reshape(shape=self.b.shape))
class Vgg7:
"""
The 'vgg7' waifu2x network.
Lower quality and slower than even upconv7 (nevermind cunet), but is very easy to implement and test.
"""
def __init__(self):
self.conv1 = Conv3x3Biased(3, 32)
self.conv2 = Conv3x3Biased(32, 32)
self.conv3 = Conv3x3Biased(32, 64)
self.conv4 = Conv3x3Biased(64, 64)
self.conv5 = Conv3x3Biased(64, 128)
self.conv6 = Conv3x3Biased(128, 128)
self.conv7 = Conv3x3Biased(128, 3, True)
def forward(self, x):
"""
Forward pass: Actually runs the network.
Input format: (1, 3, Y, X)
Output format: (1, 3, Y - 14, X - 14)
(the - 14 represents the 7-pixel context border that is lost)
"""
x = self.conv1.forward(x).leaky_relu(0.1)
x = self.conv2.forward(x).leaky_relu(0.1)
x = self.conv3.forward(x).leaky_relu(0.1)
x = self.conv4.forward(x).leaky_relu(0.1)
x = self.conv5.forward(x).leaky_relu(0.1)
x = self.conv6.forward(x).leaky_relu(0.1)
x = self.conv7.forward(x)
return x
def get_parameters(self) -> list:
return self.conv1.get_parameters() + self.conv2.get_parameters() + self.conv3.get_parameters() + self.conv4.get_parameters() + self.conv5.get_parameters() + self.conv6.get_parameters() + self.conv7.get_parameters()
def load_from_pretrained(self, intent = "art", subtype = "scale2.0x"):
"""
Downloads a nagadomi/waifu2x JSON weight file and loads it.
"""
import json
data = json.loads(fetch("https://github.com/nagadomi/waifu2x/raw/master/models/vgg_7/" + intent + "/" + subtype + "_model.json").read_bytes())
self.load_waifu2x_json(data)
def load_waifu2x_json(self, data: list):
"""
Loads weights from one of the waifu2x JSON files, i.e. waifu2x/models/vgg_7/art/noise0_model.json
data (passed in) is assumed to be the output of json.load or some similar on such a file
"""
self.conv1.load_waifu2x_json(data[0])
self.conv2.load_waifu2x_json(data[1])
self.conv3.load_waifu2x_json(data[2])
self.conv4.load_waifu2x_json(data[3])
self.conv5.load_waifu2x_json(data[4])
self.conv6.load_waifu2x_json(data[5])
self.conv7.load_waifu2x_json(data[6])
def forward_tiled(self, image: numpy.ndarray, tile_size: int) -> numpy.ndarray:
"""
Given an ndarray image as loaded by image_load (NOT a tensor), scales it, pads it, splits it up, forwards the pieces, and reconstitutes it.
Note that you really shouldn't try to run anything not (1, 3, *, *) through this.
"""
# Constant that only really gets repeated a ton here.
context = 7
context2 = context + context
# Notably, numpy is used here because it makes this fine manipulation a lot simpler.
# Scaling first - repeat on axis 2 and axis 3 (Y & X)
image = image.repeat(2, 2).repeat(2, 3)
# Resulting image buffer. This is made before the input is padded,
# since the input has the padded shape right now.
image_out = numpy.zeros(image.shape)
# Padding next. Note that this padding is done on the whole image.
# Padding the tiles would lose critical context, cause seams, etc.
image = numpy.pad(image, [[0, 0], [0, 0], [context, context], [context, context]], mode = "edge")
# Now for tiling.
# The output tile size is the usable output from an input tile (tile_size).
# As such, the tiles overlap.
out_tile_size = tile_size - context2
for out_y in range(0, image_out.shape[2], out_tile_size):
for out_x in range(0, image_out.shape[3], out_tile_size):
# Input is sourced from the same coordinates, but some stuff ought to be
# noted here for future reference:
# + out_x/y's equivalent position w/ the padding is out_x + context.
# + The output, however, is without context. Input needs context.
# + Therefore, the input rectangle is expanded on all sides by context.
# + Therefore, the input position has the context subtracted again.
# + Therefore:
in_y = out_y
in_x = out_x
# not shown: in_w/in_h = tile_size (as opposed to out_tile_size)
# Extract tile.
# Note that numpy will auto-crop this at the bottom-right.
# This will never be a problem, as tiles are specifically chosen within the padded section.
tile = image[:, :, in_y:in_y + tile_size, in_x:in_x + tile_size]
# Extracted tile dimensions -> output dimensions
# This is important because of said cropping, otherwise it'd be interior tile size.
out_h = tile.shape[2] - context2
out_w = tile.shape[3] - context2
# Process tile.
tile_t = Tensor(tile)
tile_fwd_t = self.forward(tile_t)
# Replace tile.
image_out[:, :, out_y:out_y + out_h, out_x:out_x + out_w] = tile_fwd_t.numpy()
return image_out