Add ImageFlowInjector

This commit is contained in:
James Betker 2020-09-19 10:07:00 -06:00
parent e2a146abc7
commit e0bd68efda
16 changed files with 611 additions and 1 deletions

View File

@ -0,0 +1,39 @@
from torch.autograd import Function, Variable
from torch.nn.modules.module import Module
import channelnorm_cuda
class ChannelNormFunction(Function):
@staticmethod
def forward(ctx, input1, norm_deg=2):
assert input1.is_contiguous()
b, _, h, w = input1.size()
output = input1.new(b, 1, h, w).zero_()
channelnorm_cuda.forward(input1, output, norm_deg)
ctx.save_for_backward(input1, output)
ctx.norm_deg = norm_deg
return output
@staticmethod
def backward(ctx, grad_output):
input1, output = ctx.saved_tensors
grad_input1 = Variable(input1.new(input1.size()).zero_())
channelnorm_cuda.backward(input1, output, grad_output.data,
grad_input1.data, ctx.norm_deg)
return grad_input1, None
class ChannelNorm(Module):
def __init__(self, norm_deg=2):
super(ChannelNorm, self).__init__()
self.norm_deg = norm_deg
def forward(self, input1):
return ChannelNormFunction.apply(input1, self.norm_deg)

View File

@ -0,0 +1,31 @@
#include <torch/torch.h>
#include <ATen/ATen.h>
#include "channelnorm_kernel.cuh"
int channelnorm_cuda_forward(
at::Tensor& input1,
at::Tensor& output,
int norm_deg) {
channelnorm_kernel_forward(input1, output, norm_deg);
return 1;
}
int channelnorm_cuda_backward(
at::Tensor& input1,
at::Tensor& output,
at::Tensor& gradOutput,
at::Tensor& gradInput1,
int norm_deg) {
channelnorm_kernel_backward(input1, output, gradOutput, gradInput1, norm_deg);
return 1;
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &channelnorm_cuda_forward, "Channel norm forward (CUDA)");
m.def("backward", &channelnorm_cuda_backward, "Channel norm backward (CUDA)");
}

View File

@ -0,0 +1,16 @@
#pragma once
#include <ATen/ATen.h>
void channelnorm_kernel_forward(
at::Tensor& input1,
at::Tensor& output,
int norm_deg);
void channelnorm_kernel_backward(
at::Tensor& input1,
at::Tensor& output,
at::Tensor& gradOutput,
at::Tensor& gradInput1,
int norm_deg);

View File

@ -0,0 +1,28 @@
#!/usr/bin/env python3
import os
import torch
from setuptools import setup
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
cxx_args = ['-std=c++11']
nvcc_args = [
'-gencode', 'arch=compute_52,code=sm_52',
'-gencode', 'arch=compute_60,code=sm_60',
'-gencode', 'arch=compute_61,code=sm_61',
'-gencode', 'arch=compute_70,code=sm_70',
'-gencode', 'arch=compute_70,code=compute_70'
]
setup(
name='channelnorm_cuda',
ext_modules=[
CUDAExtension('channelnorm_cuda', [
'channelnorm_cuda.cc',
'channelnorm_kernel.cu'
], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args})
],
cmdclass={
'build_ext': BuildExtension
})

View File

@ -0,0 +1,61 @@
import torch
from torch.nn.modules.module import Module
from torch.autograd import Function
import correlation_cuda
class CorrelationFunction(Function):
@staticmethod
def forward(ctx, input1, input2, pad_size=3, kernel_size=3, max_displacement=20, stride1=1, stride2=2, corr_multiply=1):
ctx.save_for_backward(input1, input2)
ctx.pad_size = pad_size
ctx.kernel_size = kernel_size
ctx.max_displacement = max_displacement
ctx.stride1 = stride1
ctx.stride2 = stride2
ctx.corr_multiply = corr_multiply
with torch.cuda.device_of(input1):
rbot1 = input1.new()
rbot2 = input2.new()
output = input1.new()
correlation_cuda.forward(input1, input2, rbot1, rbot2, output,
ctx.pad_size, ctx.kernel_size, ctx.max_displacement, ctx.stride1, ctx.stride2, ctx.corr_multiply)
return output
@staticmethod
def backward(ctx, grad_output):
input1, input2 = ctx.saved_tensors
with torch.cuda.device_of(input1):
rbot1 = input1.new()
rbot2 = input2.new()
grad_input1 = input1.new()
grad_input2 = input2.new()
correlation_cuda.backward(input1, input2, rbot1, rbot2, grad_output, grad_input1, grad_input2,
ctx.pad_size, ctx.kernel_size, ctx.max_displacement, ctx.stride1, ctx.stride2, ctx.corr_multiply)
return grad_input1, grad_input2, None, None, None, None, None, None
class Correlation(Module):
def __init__(self, pad_size=0, kernel_size=0, max_displacement=0, stride1=1, stride2=2, corr_multiply=1):
super(Correlation, self).__init__()
self.pad_size = pad_size
self.kernel_size = kernel_size
self.max_displacement = max_displacement
self.stride1 = stride1
self.stride2 = stride2
self.corr_multiply = corr_multiply
def forward(self, input1, input2):
result = CorrelationFunction.apply(input1, input2, self.pad_size, self.kernel_size, self.max_displacement, self.stride1, self.stride2, self.corr_multiply)
return result

View File

@ -0,0 +1,173 @@
#include <torch/torch.h>
#include <ATen/ATen.h>
#include <ATen/Context.h>
#include <ATen/cuda/CUDAContext.h>
#include <stdio.h>
#include <iostream>
#include "correlation_cuda_kernel.cuh"
int correlation_forward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& output,
int pad_size,
int kernel_size,
int max_displacement,
int stride1,
int stride2,
int corr_type_multiply)
{
int batchSize = input1.size(0);
int nInputChannels = input1.size(1);
int inputHeight = input1.size(2);
int inputWidth = input1.size(3);
int kernel_radius = (kernel_size - 1) / 2;
int border_radius = kernel_radius + max_displacement;
int paddedInputHeight = inputHeight + 2 * pad_size;
int paddedInputWidth = inputWidth + 2 * pad_size;
int nOutputChannels = ((max_displacement/stride2)*2 + 1) * ((max_displacement/stride2)*2 + 1);
int outputHeight = ceil(static_cast<float>(paddedInputHeight - 2 * border_radius) / static_cast<float>(stride1));
int outputwidth = ceil(static_cast<float>(paddedInputWidth - 2 * border_radius) / static_cast<float>(stride1));
rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
output.resize_({batchSize, nOutputChannels, outputHeight, outputwidth});
rInput1.fill_(0);
rInput2.fill_(0);
output.fill_(0);
int success = correlation_forward_cuda_kernel(
output,
output.size(0),
output.size(1),
output.size(2),
output.size(3),
output.stride(0),
output.stride(1),
output.stride(2),
output.stride(3),
input1,
input1.size(1),
input1.size(2),
input1.size(3),
input1.stride(0),
input1.stride(1),
input1.stride(2),
input1.stride(3),
input2,
input2.size(1),
input2.stride(0),
input2.stride(1),
input2.stride(2),
input2.stride(3),
rInput1,
rInput2,
pad_size,
kernel_size,
max_displacement,
stride1,
stride2,
corr_type_multiply,
at::cuda::getCurrentCUDAStream()
//at::globalContext().getCurrentCUDAStream()
);
//check for errors
if (!success) {
AT_ERROR("CUDA call failed");
}
return 1;
}
int correlation_backward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& gradOutput,
at::Tensor& gradInput1, at::Tensor& gradInput2,
int pad_size,
int kernel_size,
int max_displacement,
int stride1,
int stride2,
int corr_type_multiply)
{
int batchSize = input1.size(0);
int nInputChannels = input1.size(1);
int paddedInputHeight = input1.size(2)+ 2 * pad_size;
int paddedInputWidth = input1.size(3)+ 2 * pad_size;
int height = input1.size(2);
int width = input1.size(3);
rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
gradInput1.resize_({batchSize, nInputChannels, height, width});
gradInput2.resize_({batchSize, nInputChannels, height, width});
rInput1.fill_(0);
rInput2.fill_(0);
gradInput1.fill_(0);
gradInput2.fill_(0);
int success = correlation_backward_cuda_kernel(gradOutput,
gradOutput.size(0),
gradOutput.size(1),
gradOutput.size(2),
gradOutput.size(3),
gradOutput.stride(0),
gradOutput.stride(1),
gradOutput.stride(2),
gradOutput.stride(3),
input1,
input1.size(1),
input1.size(2),
input1.size(3),
input1.stride(0),
input1.stride(1),
input1.stride(2),
input1.stride(3),
input2,
input2.stride(0),
input2.stride(1),
input2.stride(2),
input2.stride(3),
gradInput1,
gradInput1.stride(0),
gradInput1.stride(1),
gradInput1.stride(2),
gradInput1.stride(3),
gradInput2,
gradInput2.size(1),
gradInput2.stride(0),
gradInput2.stride(1),
gradInput2.stride(2),
gradInput2.stride(3),
rInput1,
rInput2,
pad_size,
kernel_size,
max_displacement,
stride1,
stride2,
corr_type_multiply,
at::cuda::getCurrentCUDAStream()
//at::globalContext().getCurrentCUDAStream()
);
if (!success) {
AT_ERROR("CUDA call failed");
}
return 1;
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &correlation_forward_cuda, "Correlation forward (CUDA)");
m.def("backward", &correlation_backward_cuda, "Correlation backward (CUDA)");
}

View File

@ -0,0 +1,91 @@
#pragma once
#include <ATen/ATen.h>
#include <ATen/Context.h>
#include <cuda_runtime.h>
int correlation_forward_cuda_kernel(at::Tensor& output,
int ob,
int oc,
int oh,
int ow,
int osb,
int osc,
int osh,
int osw,
at::Tensor& input1,
int ic,
int ih,
int iw,
int isb,
int isc,
int ish,
int isw,
at::Tensor& input2,
int gc,
int gsb,
int gsc,
int gsh,
int gsw,
at::Tensor& rInput1,
at::Tensor& rInput2,
int pad_size,
int kernel_size,
int max_displacement,
int stride1,
int stride2,
int corr_type_multiply,
cudaStream_t stream);
int correlation_backward_cuda_kernel(
at::Tensor& gradOutput,
int gob,
int goc,
int goh,
int gow,
int gosb,
int gosc,
int gosh,
int gosw,
at::Tensor& input1,
int ic,
int ih,
int iw,
int isb,
int isc,
int ish,
int isw,
at::Tensor& input2,
int gsb,
int gsc,
int gsh,
int gsw,
at::Tensor& gradInput1,
int gisb,
int gisc,
int gish,
int gisw,
at::Tensor& gradInput2,
int ggc,
int ggsb,
int ggsc,
int ggsh,
int ggsw,
at::Tensor& rInput1,
at::Tensor& rInput2,
int pad_size,
int kernel_size,
int max_displacement,
int stride1,
int stride2,
int corr_type_multiply,
cudaStream_t stream);

View File

@ -0,0 +1,29 @@
#!/usr/bin/env python3
import os
import torch
from setuptools import setup, find_packages
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
cxx_args = ['-std=c++11']
nvcc_args = [
'-gencode', 'arch=compute_50,code=sm_50',
'-gencode', 'arch=compute_52,code=sm_52',
'-gencode', 'arch=compute_60,code=sm_60',
'-gencode', 'arch=compute_61,code=sm_61',
'-gencode', 'arch=compute_70,code=sm_70',
'-gencode', 'arch=compute_70,code=compute_70'
]
setup(
name='correlation_cuda',
ext_modules=[
CUDAExtension('correlation_cuda', [
'correlation_cuda.cc',
'correlation_cuda_kernel.cu'
], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args})
],
cmdclass={
'build_ext': BuildExtension
})

View File

@ -0,0 +1,49 @@
from torch.nn.modules.module import Module
from torch.autograd import Function, Variable
import resample2d_cuda
class Resample2dFunction(Function):
@staticmethod
def forward(ctx, input1, input2, kernel_size=1, bilinear= True):
assert input1.is_contiguous()
assert input2.is_contiguous()
ctx.save_for_backward(input1, input2)
ctx.kernel_size = kernel_size
ctx.bilinear = bilinear
_, d, _, _ = input1.size()
b, _, h, w = input2.size()
output = input1.new(b, d, h, w).zero_()
resample2d_cuda.forward(input1, input2, output, kernel_size, bilinear)
return output
@staticmethod
def backward(ctx, grad_output):
grad_output = grad_output.contiguous()
assert grad_output.is_contiguous()
input1, input2 = ctx.saved_tensors
grad_input1 = Variable(input1.new(input1.size()).zero_())
grad_input2 = Variable(input1.new(input2.size()).zero_())
resample2d_cuda.backward(input1, input2, grad_output.data,
grad_input1.data, grad_input2.data,
ctx.kernel_size, ctx.bilinear)
return grad_input1, grad_input2, None, None
class Resample2d(Module):
def __init__(self, kernel_size=1, bilinear = True):
super(Resample2d, self).__init__()
self.kernel_size = kernel_size
self.bilinear = bilinear
def forward(self, input1, input2):
input1_c = input1.contiguous()
return Resample2dFunction.apply(input1_c, input2, self.kernel_size, self.bilinear)

View File

@ -0,0 +1,32 @@
#include <ATen/ATen.h>
#include <torch/torch.h>
#include "resample2d_kernel.cuh"
int resample2d_cuda_forward(
at::Tensor& input1,
at::Tensor& input2,
at::Tensor& output,
int kernel_size, bool bilinear) {
resample2d_kernel_forward(input1, input2, output, kernel_size, bilinear);
return 1;
}
int resample2d_cuda_backward(
at::Tensor& input1,
at::Tensor& input2,
at::Tensor& gradOutput,
at::Tensor& gradInput1,
at::Tensor& gradInput2,
int kernel_size, bool bilinear) {
resample2d_kernel_backward(input1, input2, gradOutput, gradInput1, gradInput2, kernel_size, bilinear);
return 1;
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &resample2d_cuda_forward, "Resample2D forward (CUDA)");
m.def("backward", &resample2d_cuda_backward, "Resample2D backward (CUDA)");
}

View File

@ -0,0 +1,19 @@
#pragma once
#include <ATen/ATen.h>
void resample2d_kernel_forward(
at::Tensor& input1,
at::Tensor& input2,
at::Tensor& output,
int kernel_size,
bool bilinear);
void resample2d_kernel_backward(
at::Tensor& input1,
at::Tensor& input2,
at::Tensor& gradOutput,
at::Tensor& gradInput1,
at::Tensor& gradInput2,
int kernel_size,
bool bilinear);

View File

@ -0,0 +1,29 @@
#!/usr/bin/env python3
import os
import torch
from setuptools import setup
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
cxx_args = ['-std=c++11']
nvcc_args = [
'-gencode', 'arch=compute_50,code=sm_50',
'-gencode', 'arch=compute_52,code=sm_52',
'-gencode', 'arch=compute_60,code=sm_60',
'-gencode', 'arch=compute_61,code=sm_61',
'-gencode', 'arch=compute_70,code=sm_70',
'-gencode', 'arch=compute_70,code=compute_70'
]
setup(
name='resample2d_cuda',
ext_modules=[
CUDAExtension('resample2d_cuda', [
'resample2d_cuda.cc',
'resample2d_kernel.cu'
], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args})
],
cmdclass={
'build_ext': BuildExtension
})

View File

@ -19,6 +19,8 @@ def create_injector(opt_inject, env):
return GreyInjector(opt_inject, env)
elif type == 'interpolate':
return InterpolateInjector(opt_inject, env)
elif type == 'imageflow':
return ImageFlowInjector(opt_inject, env)
else:
raise NotImplementedError
@ -142,4 +144,15 @@ class InterpolateInjector(Injector):
def forward(self, state):
scaled = torch.nn.functional.interpolate(state[self.opt['in']], scale_factor=self.opt['scale_factor'],
mode=self.opt['mode'])
return {self.opt['out']: scaled}
return {self.opt['out']: scaled}
class ImageFlowInjector(Injector):
def __init__(self, opt, env):
# Requires building this custom cuda kernel. Only require it if explicitly needed.
from models.networks.layers.resample2d_package.resample2d import Resample2d
super(ImageFlowInjector, self).__init__(opt, env)
self.resample = Resample2d()
def forward(self, state):
return self.resample(state[self.opt['in']], state[self.opt['flow']])