From e0bd68efdafad4c70cdb71f53f336906b51b0ee5 Mon Sep 17 00:00:00 2001 From: James Betker Date: Sat, 19 Sep 2020 10:07:00 -0600 Subject: [PATCH] Add ImageFlowInjector --- .../layers/channelnorm_package/__init__.py | 0 .../layers/channelnorm_package/channelnorm.py | 39 ++++ .../channelnorm_package/channelnorm_cuda.cc | 31 ++++ .../channelnorm_kernel.cuh | 16 ++ .../layers/channelnorm_package/setup.py | 28 +++ .../layers/correlation_package/__init__.py | 0 .../layers/correlation_package/correlation.py | 61 ++++++ .../correlation_package/correlation_cuda.cc | 173 ++++++++++++++++++ .../correlation_cuda_kernel.cuh | 91 +++++++++ .../layers/correlation_package/setup.py | 29 +++ .../layers/resample2d_package/__init__.py | 0 .../layers/resample2d_package/resample2d.py | 49 +++++ .../resample2d_package/resample2d_cuda.cc | 32 ++++ .../resample2d_package/resample2d_kernel.cuh | 19 ++ .../models/layers/resample2d_package/setup.py | 29 +++ codes/models/steps/injectors.py | 15 +- 16 files changed, 611 insertions(+), 1 deletion(-) create mode 100644 codes/models/layers/channelnorm_package/__init__.py create mode 100644 codes/models/layers/channelnorm_package/channelnorm.py create mode 100644 codes/models/layers/channelnorm_package/channelnorm_cuda.cc create mode 100644 codes/models/layers/channelnorm_package/channelnorm_kernel.cuh create mode 100644 codes/models/layers/channelnorm_package/setup.py create mode 100644 codes/models/layers/correlation_package/__init__.py create mode 100644 codes/models/layers/correlation_package/correlation.py create mode 100644 codes/models/layers/correlation_package/correlation_cuda.cc create mode 100644 codes/models/layers/correlation_package/correlation_cuda_kernel.cuh create mode 100644 codes/models/layers/correlation_package/setup.py create mode 100644 codes/models/layers/resample2d_package/__init__.py create mode 100644 codes/models/layers/resample2d_package/resample2d.py create mode 100644 codes/models/layers/resample2d_package/resample2d_cuda.cc create mode 100644 codes/models/layers/resample2d_package/resample2d_kernel.cuh create mode 100644 codes/models/layers/resample2d_package/setup.py diff --git a/codes/models/layers/channelnorm_package/__init__.py b/codes/models/layers/channelnorm_package/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/codes/models/layers/channelnorm_package/channelnorm.py b/codes/models/layers/channelnorm_package/channelnorm.py new file mode 100644 index 00000000..76301af5 --- /dev/null +++ b/codes/models/layers/channelnorm_package/channelnorm.py @@ -0,0 +1,39 @@ +from torch.autograd import Function, Variable +from torch.nn.modules.module import Module +import channelnorm_cuda + +class ChannelNormFunction(Function): + + @staticmethod + def forward(ctx, input1, norm_deg=2): + assert input1.is_contiguous() + b, _, h, w = input1.size() + output = input1.new(b, 1, h, w).zero_() + + channelnorm_cuda.forward(input1, output, norm_deg) + ctx.save_for_backward(input1, output) + ctx.norm_deg = norm_deg + + return output + + @staticmethod + def backward(ctx, grad_output): + input1, output = ctx.saved_tensors + + grad_input1 = Variable(input1.new(input1.size()).zero_()) + + channelnorm_cuda.backward(input1, output, grad_output.data, + grad_input1.data, ctx.norm_deg) + + return grad_input1, None + + +class ChannelNorm(Module): + + def __init__(self, norm_deg=2): + super(ChannelNorm, self).__init__() + self.norm_deg = norm_deg + + def forward(self, input1): + return ChannelNormFunction.apply(input1, self.norm_deg) + diff --git a/codes/models/layers/channelnorm_package/channelnorm_cuda.cc b/codes/models/layers/channelnorm_package/channelnorm_cuda.cc new file mode 100644 index 00000000..69d82eb1 --- /dev/null +++ b/codes/models/layers/channelnorm_package/channelnorm_cuda.cc @@ -0,0 +1,31 @@ +#include +#include + +#include "channelnorm_kernel.cuh" + +int channelnorm_cuda_forward( + at::Tensor& input1, + at::Tensor& output, + int norm_deg) { + + channelnorm_kernel_forward(input1, output, norm_deg); + return 1; +} + + +int channelnorm_cuda_backward( + at::Tensor& input1, + at::Tensor& output, + at::Tensor& gradOutput, + at::Tensor& gradInput1, + int norm_deg) { + + channelnorm_kernel_backward(input1, output, gradOutput, gradInput1, norm_deg); + return 1; +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("forward", &channelnorm_cuda_forward, "Channel norm forward (CUDA)"); + m.def("backward", &channelnorm_cuda_backward, "Channel norm backward (CUDA)"); +} + diff --git a/codes/models/layers/channelnorm_package/channelnorm_kernel.cuh b/codes/models/layers/channelnorm_package/channelnorm_kernel.cuh new file mode 100644 index 00000000..3e6223f7 --- /dev/null +++ b/codes/models/layers/channelnorm_package/channelnorm_kernel.cuh @@ -0,0 +1,16 @@ +#pragma once + +#include + +void channelnorm_kernel_forward( + at::Tensor& input1, + at::Tensor& output, + int norm_deg); + + +void channelnorm_kernel_backward( + at::Tensor& input1, + at::Tensor& output, + at::Tensor& gradOutput, + at::Tensor& gradInput1, + int norm_deg); diff --git a/codes/models/layers/channelnorm_package/setup.py b/codes/models/layers/channelnorm_package/setup.py new file mode 100644 index 00000000..5b9e86a4 --- /dev/null +++ b/codes/models/layers/channelnorm_package/setup.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +import os +import torch + +from setuptools import setup +from torch.utils.cpp_extension import BuildExtension, CUDAExtension + +cxx_args = ['-std=c++11'] + +nvcc_args = [ + '-gencode', 'arch=compute_52,code=sm_52', + '-gencode', 'arch=compute_60,code=sm_60', + '-gencode', 'arch=compute_61,code=sm_61', + '-gencode', 'arch=compute_70,code=sm_70', + '-gencode', 'arch=compute_70,code=compute_70' +] + +setup( + name='channelnorm_cuda', + ext_modules=[ + CUDAExtension('channelnorm_cuda', [ + 'channelnorm_cuda.cc', + 'channelnorm_kernel.cu' + ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args}) + ], + cmdclass={ + 'build_ext': BuildExtension + }) diff --git a/codes/models/layers/correlation_package/__init__.py b/codes/models/layers/correlation_package/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/codes/models/layers/correlation_package/correlation.py b/codes/models/layers/correlation_package/correlation.py new file mode 100644 index 00000000..2dcdf6eb --- /dev/null +++ b/codes/models/layers/correlation_package/correlation.py @@ -0,0 +1,61 @@ +import torch +from torch.nn.modules.module import Module +from torch.autograd import Function +import correlation_cuda + +class CorrelationFunction(Function): + + @staticmethod + def forward(ctx, input1, input2, pad_size=3, kernel_size=3, max_displacement=20, stride1=1, stride2=2, corr_multiply=1): + ctx.save_for_backward(input1, input2) + + ctx.pad_size = pad_size + ctx.kernel_size = kernel_size + ctx.max_displacement = max_displacement + ctx.stride1 = stride1 + ctx.stride2 = stride2 + ctx.corr_multiply = corr_multiply + + with torch.cuda.device_of(input1): + rbot1 = input1.new() + rbot2 = input2.new() + output = input1.new() + + correlation_cuda.forward(input1, input2, rbot1, rbot2, output, + ctx.pad_size, ctx.kernel_size, ctx.max_displacement, ctx.stride1, ctx.stride2, ctx.corr_multiply) + + return output + + @staticmethod + def backward(ctx, grad_output): + input1, input2 = ctx.saved_tensors + + with torch.cuda.device_of(input1): + rbot1 = input1.new() + rbot2 = input2.new() + + grad_input1 = input1.new() + grad_input2 = input2.new() + + correlation_cuda.backward(input1, input2, rbot1, rbot2, grad_output, grad_input1, grad_input2, + ctx.pad_size, ctx.kernel_size, ctx.max_displacement, ctx.stride1, ctx.stride2, ctx.corr_multiply) + + return grad_input1, grad_input2, None, None, None, None, None, None + + +class Correlation(Module): + def __init__(self, pad_size=0, kernel_size=0, max_displacement=0, stride1=1, stride2=2, corr_multiply=1): + super(Correlation, self).__init__() + self.pad_size = pad_size + self.kernel_size = kernel_size + self.max_displacement = max_displacement + self.stride1 = stride1 + self.stride2 = stride2 + self.corr_multiply = corr_multiply + + def forward(self, input1, input2): + + result = CorrelationFunction.apply(input1, input2, self.pad_size, self.kernel_size, self.max_displacement, self.stride1, self.stride2, self.corr_multiply) + + return result + diff --git a/codes/models/layers/correlation_package/correlation_cuda.cc b/codes/models/layers/correlation_package/correlation_cuda.cc new file mode 100644 index 00000000..feccd652 --- /dev/null +++ b/codes/models/layers/correlation_package/correlation_cuda.cc @@ -0,0 +1,173 @@ +#include +#include +#include +#include +#include +#include + +#include "correlation_cuda_kernel.cuh" + +int correlation_forward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& output, + int pad_size, + int kernel_size, + int max_displacement, + int stride1, + int stride2, + int corr_type_multiply) +{ + + int batchSize = input1.size(0); + + int nInputChannels = input1.size(1); + int inputHeight = input1.size(2); + int inputWidth = input1.size(3); + + int kernel_radius = (kernel_size - 1) / 2; + int border_radius = kernel_radius + max_displacement; + + int paddedInputHeight = inputHeight + 2 * pad_size; + int paddedInputWidth = inputWidth + 2 * pad_size; + + int nOutputChannels = ((max_displacement/stride2)*2 + 1) * ((max_displacement/stride2)*2 + 1); + + int outputHeight = ceil(static_cast(paddedInputHeight - 2 * border_radius) / static_cast(stride1)); + int outputwidth = ceil(static_cast(paddedInputWidth - 2 * border_radius) / static_cast(stride1)); + + rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); + rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); + output.resize_({batchSize, nOutputChannels, outputHeight, outputwidth}); + + rInput1.fill_(0); + rInput2.fill_(0); + output.fill_(0); + + int success = correlation_forward_cuda_kernel( + output, + output.size(0), + output.size(1), + output.size(2), + output.size(3), + output.stride(0), + output.stride(1), + output.stride(2), + output.stride(3), + input1, + input1.size(1), + input1.size(2), + input1.size(3), + input1.stride(0), + input1.stride(1), + input1.stride(2), + input1.stride(3), + input2, + input2.size(1), + input2.stride(0), + input2.stride(1), + input2.stride(2), + input2.stride(3), + rInput1, + rInput2, + pad_size, + kernel_size, + max_displacement, + stride1, + stride2, + corr_type_multiply, + at::cuda::getCurrentCUDAStream() + //at::globalContext().getCurrentCUDAStream() + ); + + //check for errors + if (!success) { + AT_ERROR("CUDA call failed"); + } + + return 1; + +} + +int correlation_backward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& gradOutput, + at::Tensor& gradInput1, at::Tensor& gradInput2, + int pad_size, + int kernel_size, + int max_displacement, + int stride1, + int stride2, + int corr_type_multiply) +{ + + int batchSize = input1.size(0); + int nInputChannels = input1.size(1); + int paddedInputHeight = input1.size(2)+ 2 * pad_size; + int paddedInputWidth = input1.size(3)+ 2 * pad_size; + + int height = input1.size(2); + int width = input1.size(3); + + rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); + rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); + gradInput1.resize_({batchSize, nInputChannels, height, width}); + gradInput2.resize_({batchSize, nInputChannels, height, width}); + + rInput1.fill_(0); + rInput2.fill_(0); + gradInput1.fill_(0); + gradInput2.fill_(0); + + int success = correlation_backward_cuda_kernel(gradOutput, + gradOutput.size(0), + gradOutput.size(1), + gradOutput.size(2), + gradOutput.size(3), + gradOutput.stride(0), + gradOutput.stride(1), + gradOutput.stride(2), + gradOutput.stride(3), + input1, + input1.size(1), + input1.size(2), + input1.size(3), + input1.stride(0), + input1.stride(1), + input1.stride(2), + input1.stride(3), + input2, + input2.stride(0), + input2.stride(1), + input2.stride(2), + input2.stride(3), + gradInput1, + gradInput1.stride(0), + gradInput1.stride(1), + gradInput1.stride(2), + gradInput1.stride(3), + gradInput2, + gradInput2.size(1), + gradInput2.stride(0), + gradInput2.stride(1), + gradInput2.stride(2), + gradInput2.stride(3), + rInput1, + rInput2, + pad_size, + kernel_size, + max_displacement, + stride1, + stride2, + corr_type_multiply, + at::cuda::getCurrentCUDAStream() + //at::globalContext().getCurrentCUDAStream() + ); + + if (!success) { + AT_ERROR("CUDA call failed"); + } + + return 1; +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("forward", &correlation_forward_cuda, "Correlation forward (CUDA)"); + m.def("backward", &correlation_backward_cuda, "Correlation backward (CUDA)"); +} + diff --git a/codes/models/layers/correlation_package/correlation_cuda_kernel.cuh b/codes/models/layers/correlation_package/correlation_cuda_kernel.cuh new file mode 100644 index 00000000..1586d3af --- /dev/null +++ b/codes/models/layers/correlation_package/correlation_cuda_kernel.cuh @@ -0,0 +1,91 @@ +#pragma once + +#include +#include +#include + +int correlation_forward_cuda_kernel(at::Tensor& output, + int ob, + int oc, + int oh, + int ow, + int osb, + int osc, + int osh, + int osw, + + at::Tensor& input1, + int ic, + int ih, + int iw, + int isb, + int isc, + int ish, + int isw, + + at::Tensor& input2, + int gc, + int gsb, + int gsc, + int gsh, + int gsw, + + at::Tensor& rInput1, + at::Tensor& rInput2, + int pad_size, + int kernel_size, + int max_displacement, + int stride1, + int stride2, + int corr_type_multiply, + cudaStream_t stream); + + +int correlation_backward_cuda_kernel( + at::Tensor& gradOutput, + int gob, + int goc, + int goh, + int gow, + int gosb, + int gosc, + int gosh, + int gosw, + + at::Tensor& input1, + int ic, + int ih, + int iw, + int isb, + int isc, + int ish, + int isw, + + at::Tensor& input2, + int gsb, + int gsc, + int gsh, + int gsw, + + at::Tensor& gradInput1, + int gisb, + int gisc, + int gish, + int gisw, + + at::Tensor& gradInput2, + int ggc, + int ggsb, + int ggsc, + int ggsh, + int ggsw, + + at::Tensor& rInput1, + at::Tensor& rInput2, + int pad_size, + int kernel_size, + int max_displacement, + int stride1, + int stride2, + int corr_type_multiply, + cudaStream_t stream); diff --git a/codes/models/layers/correlation_package/setup.py b/codes/models/layers/correlation_package/setup.py new file mode 100644 index 00000000..48b7d73a --- /dev/null +++ b/codes/models/layers/correlation_package/setup.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +import os +import torch + +from setuptools import setup, find_packages +from torch.utils.cpp_extension import BuildExtension, CUDAExtension + +cxx_args = ['-std=c++11'] + +nvcc_args = [ + '-gencode', 'arch=compute_50,code=sm_50', + '-gencode', 'arch=compute_52,code=sm_52', + '-gencode', 'arch=compute_60,code=sm_60', + '-gencode', 'arch=compute_61,code=sm_61', + '-gencode', 'arch=compute_70,code=sm_70', + '-gencode', 'arch=compute_70,code=compute_70' +] + +setup( + name='correlation_cuda', + ext_modules=[ + CUDAExtension('correlation_cuda', [ + 'correlation_cuda.cc', + 'correlation_cuda_kernel.cu' + ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args}) + ], + cmdclass={ + 'build_ext': BuildExtension + }) diff --git a/codes/models/layers/resample2d_package/__init__.py b/codes/models/layers/resample2d_package/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/codes/models/layers/resample2d_package/resample2d.py b/codes/models/layers/resample2d_package/resample2d.py new file mode 100644 index 00000000..92ea0d01 --- /dev/null +++ b/codes/models/layers/resample2d_package/resample2d.py @@ -0,0 +1,49 @@ +from torch.nn.modules.module import Module +from torch.autograd import Function, Variable +import resample2d_cuda + +class Resample2dFunction(Function): + + @staticmethod + def forward(ctx, input1, input2, kernel_size=1, bilinear= True): + assert input1.is_contiguous() + assert input2.is_contiguous() + + ctx.save_for_backward(input1, input2) + ctx.kernel_size = kernel_size + ctx.bilinear = bilinear + + _, d, _, _ = input1.size() + b, _, h, w = input2.size() + output = input1.new(b, d, h, w).zero_() + + resample2d_cuda.forward(input1, input2, output, kernel_size, bilinear) + + return output + + @staticmethod + def backward(ctx, grad_output): + grad_output = grad_output.contiguous() + assert grad_output.is_contiguous() + + input1, input2 = ctx.saved_tensors + + grad_input1 = Variable(input1.new(input1.size()).zero_()) + grad_input2 = Variable(input1.new(input2.size()).zero_()) + + resample2d_cuda.backward(input1, input2, grad_output.data, + grad_input1.data, grad_input2.data, + ctx.kernel_size, ctx.bilinear) + + return grad_input1, grad_input2, None, None + +class Resample2d(Module): + + def __init__(self, kernel_size=1, bilinear = True): + super(Resample2d, self).__init__() + self.kernel_size = kernel_size + self.bilinear = bilinear + + def forward(self, input1, input2): + input1_c = input1.contiguous() + return Resample2dFunction.apply(input1_c, input2, self.kernel_size, self.bilinear) diff --git a/codes/models/layers/resample2d_package/resample2d_cuda.cc b/codes/models/layers/resample2d_package/resample2d_cuda.cc new file mode 100644 index 00000000..75cc6260 --- /dev/null +++ b/codes/models/layers/resample2d_package/resample2d_cuda.cc @@ -0,0 +1,32 @@ +#include +#include + +#include "resample2d_kernel.cuh" + +int resample2d_cuda_forward( + at::Tensor& input1, + at::Tensor& input2, + at::Tensor& output, + int kernel_size, bool bilinear) { + resample2d_kernel_forward(input1, input2, output, kernel_size, bilinear); + return 1; +} + +int resample2d_cuda_backward( + at::Tensor& input1, + at::Tensor& input2, + at::Tensor& gradOutput, + at::Tensor& gradInput1, + at::Tensor& gradInput2, + int kernel_size, bool bilinear) { + resample2d_kernel_backward(input1, input2, gradOutput, gradInput1, gradInput2, kernel_size, bilinear); + return 1; +} + + + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("forward", &resample2d_cuda_forward, "Resample2D forward (CUDA)"); + m.def("backward", &resample2d_cuda_backward, "Resample2D backward (CUDA)"); +} + diff --git a/codes/models/layers/resample2d_package/resample2d_kernel.cuh b/codes/models/layers/resample2d_package/resample2d_kernel.cuh new file mode 100644 index 00000000..a2595159 --- /dev/null +++ b/codes/models/layers/resample2d_package/resample2d_kernel.cuh @@ -0,0 +1,19 @@ +#pragma once + +#include + +void resample2d_kernel_forward( + at::Tensor& input1, + at::Tensor& input2, + at::Tensor& output, + int kernel_size, + bool bilinear); + +void resample2d_kernel_backward( + at::Tensor& input1, + at::Tensor& input2, + at::Tensor& gradOutput, + at::Tensor& gradInput1, + at::Tensor& gradInput2, + int kernel_size, + bool bilinear); \ No newline at end of file diff --git a/codes/models/layers/resample2d_package/setup.py b/codes/models/layers/resample2d_package/setup.py new file mode 100644 index 00000000..bbedb255 --- /dev/null +++ b/codes/models/layers/resample2d_package/setup.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +import os +import torch + +from setuptools import setup +from torch.utils.cpp_extension import BuildExtension, CUDAExtension + +cxx_args = ['-std=c++11'] + +nvcc_args = [ + '-gencode', 'arch=compute_50,code=sm_50', + '-gencode', 'arch=compute_52,code=sm_52', + '-gencode', 'arch=compute_60,code=sm_60', + '-gencode', 'arch=compute_61,code=sm_61', + '-gencode', 'arch=compute_70,code=sm_70', + '-gencode', 'arch=compute_70,code=compute_70' +] + +setup( + name='resample2d_cuda', + ext_modules=[ + CUDAExtension('resample2d_cuda', [ + 'resample2d_cuda.cc', + 'resample2d_kernel.cu' + ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args}) + ], + cmdclass={ + 'build_ext': BuildExtension + }) diff --git a/codes/models/steps/injectors.py b/codes/models/steps/injectors.py index ab1101a1..7ba02420 100644 --- a/codes/models/steps/injectors.py +++ b/codes/models/steps/injectors.py @@ -19,6 +19,8 @@ def create_injector(opt_inject, env): return GreyInjector(opt_inject, env) elif type == 'interpolate': return InterpolateInjector(opt_inject, env) + elif type == 'imageflow': + return ImageFlowInjector(opt_inject, env) else: raise NotImplementedError @@ -142,4 +144,15 @@ class InterpolateInjector(Injector): def forward(self, state): scaled = torch.nn.functional.interpolate(state[self.opt['in']], scale_factor=self.opt['scale_factor'], mode=self.opt['mode']) - return {self.opt['out']: scaled} \ No newline at end of file + return {self.opt['out']: scaled} + + +class ImageFlowInjector(Injector): + def __init__(self, opt, env): + # Requires building this custom cuda kernel. Only require it if explicitly needed. + from models.networks.layers.resample2d_package.resample2d import Resample2d + super(ImageFlowInjector, self).__init__(opt, env) + self.resample = Resample2d() + + def forward(self, state): + return self.resample(state[self.opt['in']], state[self.opt['flow']])