From bcce70fca694ca72c5042d358fa457ef25046414 Mon Sep 17 00:00:00 2001 From: Brandon Rising Date: Mon, 17 Jul 2023 16:27:33 -0400 Subject: [PATCH] Testing different session opts, added timings for testing --- invokeai/app/invocations/onnx.py | 13 ++++++++----- invokeai/backend/model_management/models/base.py | 4 +++- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/invokeai/app/invocations/onnx.py b/invokeai/app/invocations/onnx.py index e08d0bfe2b..8ad2f43877 100644 --- a/invokeai/app/invocations/onnx.py +++ b/invokeai/app/invocations/onnx.py @@ -195,7 +195,7 @@ class ONNXTextToLatentsInvocation(BaseInvocation): latents = latents.cpu().numpy() # TODO: better execution device handling - latents = latents.astype(np.float32) + latents = latents.astype(np.float16) # get the initial random noise unless the user supplied it do_classifier_free_guidance = True @@ -232,10 +232,11 @@ class ONNXTextToLatentsInvocation(BaseInvocation): unet.create_session() timestep_dtype = next( - (input.type for input in unet.session.get_inputs() if input.name == "timestep"), "tensor(float)" + (input.type for input in unet.session.get_inputs() if input.name == "timestep"), "tensor(float16)" ) timestep_dtype = ORT_TO_NP_TYPE[timestep_dtype] - + import time + times = [] for i in tqdm(range(len(scheduler.timesteps))): t = scheduler.timesteps[i] # expand the latents if we are doing classifier free guidance @@ -245,7 +246,9 @@ class ONNXTextToLatentsInvocation(BaseInvocation): # predict the noise residual timestep = np.array([t], dtype=timestep_dtype) + start_time = time.time() noise_pred = unet(sample=latent_model_input, timestep=timestep, encoder_hidden_states=prompt_embeds) + times.append(time.time() - start_time) noise_pred = noise_pred[0] # perform guidance @@ -262,14 +265,14 @@ class ONNXTextToLatentsInvocation(BaseInvocation): # call the callback, if provided #if callback is not None and i % callback_steps == 0: # callback(i, t, latents) - + print(times) unet.release_session() torch.cuda.empty_cache() name = f'{context.graph_execution_state_id}__{self.id}' context.services.latents.save(name, latents) - return build_latents_output(latents_name=name, latents=latents) + return build_latents_output(latents_name=name, latents=torch.from_numpy(latents)) # Latent to image class ONNXLatentsToImageInvocation(BaseInvocation): diff --git a/invokeai/backend/model_management/models/base.py b/invokeai/backend/model_management/models/base.py index 08c7475c69..7c3d7bde33 100644 --- a/invokeai/backend/model_management/models/base.py +++ b/invokeai/backend/model_management/models/base.py @@ -20,7 +20,7 @@ from typing import List, Dict, Optional, Type, Literal, TypeVar, Generic, Callab import onnx from onnx import numpy_helper from onnx.external_data_helper import set_external_data -from onnxruntime import InferenceSession, OrtValue, SessionOptions +from onnxruntime import InferenceSession, OrtValue, SessionOptions, ExecutionMode, GraphOptimizationLevel class InvalidModelException(Exception): pass @@ -552,6 +552,8 @@ class IAIOnnxRuntimeModel: sess = SessionOptions() #self._external_data.update(**external_data) # sess.add_external_initializers(list(self.data.keys()), list(self.data.values())) + sess.execution_mode = ExecutionMode.ORT_PARALLEL + sess.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL self.session = InferenceSession(self.proto.SerializeToString(), providers=['CUDAExecutionProvider', 'CPUExecutionProvider'], sess_options=sess) #self.session = InferenceSession("tmp.onnx", providers=[self.provider], sess_options=self.sess_options)