resnet-classifier/image_classifier/inference.py

103 lines
2.9 KiB
Python
Executable File

import torch
import torchaudio
import soundfile
import time
import logging
_logger = logging.getLogger(__name__)
from torch import Tensor
from einops import rearrange
from pathlib import Path
from .utils import to_device, set_seed, wrapper as ml
from PIL import Image, ImageDraw
import torchvision.transforms as transforms
from .config import cfg, Config
from .models import get_models
from .engines import load_engines, deepspeed_available
from .data import get_symmap, tokenize
if deepspeed_available:
import deepspeed
class Classifier():
def __init__( self, config=None, device=None, amp=None, dtype=None, attention=None ):
self.loading = True
# yes I can just grab **kwargs and forward them here
self.load_config( config=config, device=device, amp=amp, dtype=dtype, attention=attention )
self.load_model()
self.loading = False
def load_config( self, config=None, device=None, amp=None, dtype=None, attention=None ):
if config:
_logger.info(f"Loading YAML: {config}")
cfg.load_yaml( config )
try:
cfg.format( training=False )
cfg.dataset.use_hdf5 = False # could use cfg.load_hdf5(), but why would it ever need to be loaded for inferencing
except Exception as e:
raise e # throw an error because I'm tired of silent errors messing things up for me
if amp is None:
amp = cfg.inference.amp
if dtype is None or dtype == "auto":
dtype = cfg.inference.weight_dtype
if device is None:
device = cfg.device
cfg.device = device
cfg.mode = "inferencing"
cfg.trainer.backend = cfg.inference.backend
cfg.trainer.weight_dtype = dtype
cfg.inference.weight_dtype = dtype
self.device = device
self.dtype = cfg.inference.dtype
self.amp = amp
self.model_kwargs = {}
def load_model( self ):
load_engines.cache_clear()
self.engines = load_engines(training=False, **self.model_kwargs)
for name, engine in self.engines.items():
if self.dtype != torch.int8:
engine.to(self.device, dtype=self.dtype if not self.amp else torch.float32)
self.engines.eval()
self.symmap = get_symmap()
self.width = 300
self.height = 80
self.transform = transforms.Compose([
transforms.Resize((self.height, self.width)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
_logger.info("Loaded model")
@torch.inference_mode()
def inference( self, image, temperature=1.0 ):
model = None
for name, engine in self.engines.items():
model = engine.module
break
image = self.transform(image).to(self.device).to(self.dtype)
with torch.autocast("cuda", dtype=self.dtype, enabled=self.amp):
answer = model( image=[image], sampling_temperature=temperature )
answer = [ "".join(answer) ]
answer = answer[0].replace('<s>', "").replace("</s>", "") # it would be better to just slice between these, but I can't be assed
return answer