openpilot_comma/tinygrad_repo/examples/vgg7_helpers/waifu2x.py

# Implementation of waifu2x vgg7 in tinygrad.
# Obviously, not developed, supported, etc. by the original waifu2x author(s).

import numpy
from tinygrad.tensor import Tensor
from PIL import Image
from tinygrad.helpers import fetch

# File Formats

# tinygrad convolution tensor input layout is (1,c,y,x) - and therefore the form for all images used in the project
# tinygrad convolution tensor weight layout is (outC,inC,H,W) - this matches NCNN (and therefore KINNE), but not waifu2x json

def image_load(path) -> numpy.ndarray:
  """
  Loads an image in the shape expected by other functions in this module.
  Doesn't Tensor it, in case you need to do further work with it.
  """
  # file
  na = numpy.array(Image.open(path))
  if na.shape[2] == 4:
    # RGBA -> RGB (covers opaque images with alpha channels)
    na = na[:,:,0:3]
  # fix shape
  na = numpy.moveaxis(na, [2,0,1], [0,1,2])
  # shape is now (3,h,w), add 1
  na = na.reshape(1,3,na.shape[1],na.shape[2])
  # change type
  na = na.astype("float32") / 255.0
  return na

def image_save(path, na: numpy.ndarray):
  """
  Saves an image of the shape expected by other functions in this module.
  However, note this expects a numpy array.
  """
  # change type
  na = numpy.fmax(numpy.fmin(na * 255.0, 255), 0).astype("uint8")
  # shape is now (1,3,h,w), remove 1
  na = na.reshape(3,na.shape[2],na.shape[3])
  # fix shape
  na = numpy.moveaxis(na, [0,1,2], [2,0,1])
  # shape is now (h,w,3)
  # file
  Image.fromarray(na).save(path)

# The Model

class Conv3x3Biased:
  """
  A 3x3 convolution layer with some utility functions.
  """
  def __init__(self, inC, outC, last = False):
    # The properties must be named as "W" and "b".
    # This is in an attempt to try and be roughly compatible with https://github.com/FHPythonUtils/Waifu2x
    #  though this cannot necessarily account for transposition and other such things.

    # Massively overstate the weights to get them to be focused on,
    #  since otherwise the biases overrule everything
    self.W = Tensor.uniform(outC, inC, 3, 3) * 16.0
    # Layout-wise, blatant cheat, but serious_mnist does it. I'd guess channels either have to have a size of 1 or whatever the target is?
    # Values-wise, entirely different blatant cheat.
    # In most cases, use uniform bias, but tiny.
    # For the last layer, use just 0.5, constant.
    if last:
      self.b = Tensor.zeros(1, outC, 1, 1) + 0.5
    else:
      self.b = Tensor.uniform(1, outC, 1, 1)

  def forward(self, x):
    # You might be thinking, "but what about padding?"
    # Answer: Tiling is used to stitch everything back together, though you could pad the image before providing it.
    return x.conv2d(self.W).add(self.b)

  def get_parameters(self) -> list:
    return [self.W, self.b]

  def load_waifu2x_json(self, layer: dict):
    # Weights in this file are outChannel,inChannel,X,Y.
    # Not outChannel,inChannel,Y,X.
    # Therefore, transpose it before assignment.
    # I have long since forgotten how I worked this out.
    self.W.assign(Tensor(layer["weight"]).reshape(shape=self.W.shape).transpose(2, 3))
    self.b.assign(Tensor(layer["bias"]).reshape(shape=self.b.shape))

class Vgg7:
  """
  The 'vgg7' waifu2x network.
  Lower quality and slower than even upconv7 (nevermind cunet), but is very easy to implement and test.
  """

  def __init__(self):
    self.conv1 = Conv3x3Biased(3, 32)
    self.conv2 = Conv3x3Biased(32, 32)
    self.conv3 = Conv3x3Biased(32, 64)
    self.conv4 = Conv3x3Biased(64, 64)
    self.conv5 = Conv3x3Biased(64, 128)
    self.conv6 = Conv3x3Biased(128, 128)
    self.conv7 = Conv3x3Biased(128, 3, True)

  def forward(self, x):
    """
    Forward pass: Actually runs the network.
    Input format: (1, 3, Y, X)
    Output format: (1, 3, Y - 14, X - 14)
    (the - 14 represents the 7-pixel context border that is lost)
    """
    x = self.conv1.forward(x).leaky_relu(0.1)
    x = self.conv2.forward(x).leaky_relu(0.1)
    x = self.conv3.forward(x).leaky_relu(0.1)
    x = self.conv4.forward(x).leaky_relu(0.1)
    x = self.conv5.forward(x).leaky_relu(0.1)
    x = self.conv6.forward(x).leaky_relu(0.1)
    x = self.conv7.forward(x)
    return x

  def get_parameters(self) -> list:
    return self.conv1.get_parameters() + self.conv2.get_parameters() + self.conv3.get_parameters() + self.conv4.get_parameters() + self.conv5.get_parameters() + self.conv6.get_parameters() + self.conv7.get_parameters()

  def load_from_pretrained(self, intent = "art", subtype = "scale2.0x"):
    """
    Downloads a nagadomi/waifu2x JSON weight file and loads it.
    """
    import json
    data = json.loads(fetch("https://github.com/nagadomi/waifu2x/raw/master/models/vgg_7/" + intent + "/" + subtype + "_model.json").read_bytes())
    self.load_waifu2x_json(data)

  def load_waifu2x_json(self, data: list):
    """
    Loads weights from one of the waifu2x JSON files, i.e. waifu2x/models/vgg_7/art/noise0_model.json
    data (passed in) is assumed to be the output of json.load or some similar on such a file
    """
    self.conv1.load_waifu2x_json(data[0])
    self.conv2.load_waifu2x_json(data[1])
    self.conv3.load_waifu2x_json(data[2])
    self.conv4.load_waifu2x_json(data[3])
    self.conv5.load_waifu2x_json(data[4])
    self.conv6.load_waifu2x_json(data[5])
    self.conv7.load_waifu2x_json(data[6])

  def forward_tiled(self, image: numpy.ndarray, tile_size: int) -> numpy.ndarray:
    """
    Given an ndarray image as loaded by image_load (NOT a tensor), scales it, pads it, splits it up, forwards the pieces, and reconstitutes it.
    Note that you really shouldn't try to run anything not (1, 3, *, *) through this.
    """
    # Constant that only really gets repeated a ton here.
    context = 7
    context2 = context + context

    # Notably, numpy is used here because it makes this fine manipulation a lot simpler.
    # Scaling first - repeat on axis 2 and axis 3 (Y & X)
    image = image.repeat(2, 2).repeat(2, 3)

    # Resulting image buffer. This is made before the input is padded,
    #  since the input has the padded shape right now.
    image_out = numpy.zeros(image.shape)

    # Padding next. Note that this padding is done on the whole image.
    # Padding the tiles would lose critical context, cause seams, etc.
    image = numpy.pad(image, [[0, 0], [0, 0], [context, context], [context, context]], mode = "edge")

    # Now for tiling.
    # The output tile size is the usable output from an input tile (tile_size).
    # As such, the tiles overlap.
    out_tile_size = tile_size - context2
    for out_y in range(0, image_out.shape[2], out_tile_size):
      for out_x in range(0, image_out.shape[3], out_tile_size):
        # Input is sourced from the same coordinates, but some stuff ought to be
        #  noted here for future reference:
        # + out_x/y's equivalent position w/ the padding is out_x + context.
        # + The output, however, is without context. Input needs context.
        # + Therefore, the input rectangle is expanded on all sides by context.
        # + Therefore, the input position has the context subtracted again.
        # + Therefore:
        in_y = out_y
        in_x = out_x
        # not shown: in_w/in_h = tile_size (as opposed to out_tile_size)
        # Extract tile.
        # Note that numpy will auto-crop this at the bottom-right.
        # This will never be a problem, as tiles are specifically chosen within the padded section.
        tile = image[:, :, in_y:in_y + tile_size, in_x:in_x + tile_size]
        # Extracted tile dimensions -> output dimensions
        # This is important because of said cropping, otherwise it'd be interior tile size.
        out_h = tile.shape[2] - context2
        out_w = tile.shape[3] - context2
        # Process tile.
        tile_t = Tensor(tile)
        tile_fwd_t = self.forward(tile_t)
        # Replace tile.
        image_out[:, :, out_y:out_y + out_h, out_x:out_x + out_w] = tile_fwd_t.numpy()

    return image_out
openpilot v0.10.1 release date: 2025-10-14T09:03:02 master commit: 87443cd34d8db902a5e665b9ef679f09899a1c1a 13 hours ago			`# Implementation of waifu2x vgg7 in tinygrad.`
			`# Obviously, not developed, supported, etc. by the original waifu2x author(s).`

			`import numpy`
			`from tinygrad.tensor import Tensor`
			`from PIL import Image`
			`from tinygrad.helpers import fetch`

			`# File Formats`

			`# tinygrad convolution tensor input layout is (1,c,y,x) - and therefore the form for all images used in the project`
			`# tinygrad convolution tensor weight layout is (outC,inC,H,W) - this matches NCNN (and therefore KINNE), but not waifu2x json`

			`def image_load(path) -> numpy.ndarray:`
			`"""`
			`Loads an image in the shape expected by other functions in this module.`
			`Doesn't Tensor it, in case you need to do further work with it.`
			`"""`
			`# file`
			`na = numpy.array(Image.open(path))`
			`if na.shape[2] == 4:`
			`# RGBA -> RGB (covers opaque images with alpha channels)`
			`na = na[:,:,0:3]`
			`# fix shape`
			`na = numpy.moveaxis(na, [2,0,1], [0,1,2])`
			`# shape is now (3,h,w), add 1`
			`na = na.reshape(1,3,na.shape[1],na.shape[2])`
			`# change type`
			`na = na.astype("float32") / 255.0`
			`return na`

			`def image_save(path, na: numpy.ndarray):`
			`"""`
			`Saves an image of the shape expected by other functions in this module.`
			`However, note this expects a numpy array.`
			`"""`
			`# change type`
			`na = numpy.fmax(numpy.fmin(na * 255.0, 255), 0).astype("uint8")`
			`# shape is now (1,3,h,w), remove 1`
			`na = na.reshape(3,na.shape[2],na.shape[3])`
			`# fix shape`
			`na = numpy.moveaxis(na, [0,1,2], [2,0,1])`
			`# shape is now (h,w,3)`
			`# file`
			`Image.fromarray(na).save(path)`

			`# The Model`

			`class Conv3x3Biased:`
			`"""`
			`A 3x3 convolution layer with some utility functions.`
			`"""`
			`def __init__(self, inC, outC, last = False):`
			`# The properties must be named as "W" and "b".`
			`# This is in an attempt to try and be roughly compatible with https://github.com/FHPythonUtils/Waifu2x`
			`# though this cannot necessarily account for transposition and other such things.`

			`# Massively overstate the weights to get them to be focused on,`
			`# since otherwise the biases overrule everything`
			`self.W = Tensor.uniform(outC, inC, 3, 3) * 16.0`
			`# Layout-wise, blatant cheat, but serious_mnist does it. I'd guess channels either have to have a size of 1 or whatever the target is?`
			`# Values-wise, entirely different blatant cheat.`
			`# In most cases, use uniform bias, but tiny.`
			`# For the last layer, use just 0.5, constant.`
			`if last:`
			`self.b = Tensor.zeros(1, outC, 1, 1) + 0.5`
			`else:`
			`self.b = Tensor.uniform(1, outC, 1, 1)`

			`def forward(self, x):`
			`# You might be thinking, "but what about padding?"`
			`# Answer: Tiling is used to stitch everything back together, though you could pad the image before providing it.`
			`return x.conv2d(self.W).add(self.b)`

			`def get_parameters(self) -> list:`
			`return [self.W, self.b]`

			`def load_waifu2x_json(self, layer: dict):`
			`# Weights in this file are outChannel,inChannel,X,Y.`
			`# Not outChannel,inChannel,Y,X.`
			`# Therefore, transpose it before assignment.`
			`# I have long since forgotten how I worked this out.`
			`self.W.assign(Tensor(layer["weight"]).reshape(shape=self.W.shape).transpose(2, 3))`
			`self.b.assign(Tensor(layer["bias"]).reshape(shape=self.b.shape))`

			`class Vgg7:`
			`"""`
			`The 'vgg7' waifu2x network.`
			`Lower quality and slower than even upconv7 (nevermind cunet), but is very easy to implement and test.`
			`"""`

			`def __init__(self):`
			`self.conv1 = Conv3x3Biased(3, 32)`
			`self.conv2 = Conv3x3Biased(32, 32)`
			`self.conv3 = Conv3x3Biased(32, 64)`
			`self.conv4 = Conv3x3Biased(64, 64)`
			`self.conv5 = Conv3x3Biased(64, 128)`
			`self.conv6 = Conv3x3Biased(128, 128)`
			`self.conv7 = Conv3x3Biased(128, 3, True)`

			`def forward(self, x):`
			`"""`
			`Forward pass: Actually runs the network.`
			`Input format: (1, 3, Y, X)`
			`Output format: (1, 3, Y - 14, X - 14)`
			`(the - 14 represents the 7-pixel context border that is lost)`
			`"""`
			`x = self.conv1.forward(x).leaky_relu(0.1)`
			`x = self.conv2.forward(x).leaky_relu(0.1)`
			`x = self.conv3.forward(x).leaky_relu(0.1)`
			`x = self.conv4.forward(x).leaky_relu(0.1)`
			`x = self.conv5.forward(x).leaky_relu(0.1)`
			`x = self.conv6.forward(x).leaky_relu(0.1)`
			`x = self.conv7.forward(x)`
			`return x`

			`def get_parameters(self) -> list:`
			`return self.conv1.get_parameters() + self.conv2.get_parameters() + self.conv3.get_parameters() + self.conv4.get_parameters() + self.conv5.get_parameters() + self.conv6.get_parameters() + self.conv7.get_parameters()`

			`def load_from_pretrained(self, intent = "art", subtype = "scale2.0x"):`
			`"""`
			`Downloads a nagadomi/waifu2x JSON weight file and loads it.`
			`"""`
			`import json`
			`data = json.loads(fetch("https://github.com/nagadomi/waifu2x/raw/master/models/vgg_7/" + intent + "/" + subtype + "_model.json").read_bytes())`
			`self.load_waifu2x_json(data)`

			`def load_waifu2x_json(self, data: list):`
			`"""`
			`Loads weights from one of the waifu2x JSON files, i.e. waifu2x/models/vgg_7/art/noise0_model.json`
			`data (passed in) is assumed to be the output of json.load or some similar on such a file`
			`"""`
			`self.conv1.load_waifu2x_json(data[0])`
			`self.conv2.load_waifu2x_json(data[1])`
			`self.conv3.load_waifu2x_json(data[2])`
			`self.conv4.load_waifu2x_json(data[3])`
			`self.conv5.load_waifu2x_json(data[4])`
			`self.conv6.load_waifu2x_json(data[5])`
			`self.conv7.load_waifu2x_json(data[6])`

			`def forward_tiled(self, image: numpy.ndarray, tile_size: int) -> numpy.ndarray:`
			`"""`
			`Given an ndarray image as loaded by image_load (NOT a tensor), scales it, pads it, splits it up, forwards the pieces, and reconstitutes it.`
			`Note that you really shouldn't try to run anything not (1, 3, , ) through this.`
			`"""`
			`# Constant that only really gets repeated a ton here.`
			`context = 7`
			`context2 = context + context`

			`# Notably, numpy is used here because it makes this fine manipulation a lot simpler.`
			`# Scaling first - repeat on axis 2 and axis 3 (Y & X)`
			`image = image.repeat(2, 2).repeat(2, 3)`

			`# Resulting image buffer. This is made before the input is padded,`
			`# since the input has the padded shape right now.`
			`image_out = numpy.zeros(image.shape)`

			`# Padding next. Note that this padding is done on the whole image.`
			`# Padding the tiles would lose critical context, cause seams, etc.`
			`image = numpy.pad(image, [[0, 0], [0, 0], [context, context], [context, context]], mode = "edge")`

			`# Now for tiling.`
			`# The output tile size is the usable output from an input tile (tile_size).`
			`# As such, the tiles overlap.`
			`out_tile_size = tile_size - context2`
			`for out_y in range(0, image_out.shape[2], out_tile_size):`
			`for out_x in range(0, image_out.shape[3], out_tile_size):`
			`# Input is sourced from the same coordinates, but some stuff ought to be`
			`# noted here for future reference:`
			`# + out_x/y's equivalent position w/ the padding is out_x + context.`
			`# + The output, however, is without context. Input needs context.`
			`# + Therefore, the input rectangle is expanded on all sides by context.`
			`# + Therefore, the input position has the context subtracted again.`
			`# + Therefore:`
			`in_y = out_y`
			`in_x = out_x`
			`# not shown: in_w/in_h = tile_size (as opposed to out_tile_size)`
			`# Extract tile.`
			`# Note that numpy will auto-crop this at the bottom-right.`
			`# This will never be a problem, as tiles are specifically chosen within the padded section.`
			`tile = image[:, :, in_y:in_y + tile_size, in_x:in_x + tile_size]`
			`# Extracted tile dimensions -> output dimensions`
			`# This is important because of said cropping, otherwise it'd be interior tile size.`
			`out_h = tile.shape[2] - context2`
			`out_w = tile.shape[3] - context2`
			`# Process tile.`
			`tile_t = Tensor(tile)`
			`tile_fwd_t = self.forward(tile_t)`
			`# Replace tile.`
			`image_out[:, :, out_y:out_y + out_h, out_x:out_x + out_w] = tile_fwd_t.numpy()`

			`return image_out`