fixed issues now that shark-runtime patch is added (#92)

* updated README * added debug printouts * added debug printouts Co-authored-by: Elias Joseph <elias@nod-labs.com>
2026-04-03 03:00:17 -04:00 · 2022-06-06 16:09:26 -07:00
parent 39f00a8404
commit 6cb7a56787
3 changed files with 20 additions and 11 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,4 @@
 [submodule "inference/thirdparty/shark-runtime"]
 	path = inference/thirdparty/shark-runtime
-	url = https://github.com/NodLabs/shark-runtime.git
+	url =https://github.com/nod-ai/SHARK-Runtime.git
+	branch = shark-06032022
--- a/inference/src/dshark.cc
+++ b/inference/src/dshark.cc
@@ -849,7 +849,11 @@ ModelInstanceState::ProcessRequests(
        model_state_->TritonMemoryManager(), false,
        nullptr));

-    InitializeRuntimeEnvironment(&input_module_, &hal_module_, &device_, &instance_, &context_);
+    ModelInstanceState::device_name_code code = hashit(model_state_->DeviceName());
+
+    if(code == GPU_KIND){
+        iree_cuda_set_current_thread(device_);
+    } 
        
    RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
        responses, request_count, all_response_failed,
@@ -906,6 +910,8 @@ ModelInstanceState::ProcessRequests(
 #endif

  // Run...
+
+
  if (!all_response_failed) {
    Execute(&responses, requests, request_count, request_count, input_tensors, output_tensors, output_names);
  }
@@ -976,11 +982,11 @@ ModelInstanceState::ProcessRequests(
        "failed reporting batch request statistics");
  }

-  iree_hal_device_release(device_);
-  iree_vm_context_release(context_);
-  iree_vm_instance_release(instance_);
-  iree_vm_module_release(hal_module_);
-  iree_vm_module_release(input_module_);
+  //iree_hal_device_release(device_);
+  //iree_vm_context_release(context_);
+  //iree_vm_instance_release(instance_);
+  //iree_vm_module_release(hal_module_);
+  //iree_vm_module_release(input_module_);

 }

@@ -1124,7 +1130,8 @@ ModelInstanceState::InitializeRuntimeEnvironment(
      iree_vm_module_t** hal_module,
      iree_hal_device_t** device,
      iree_vm_instance_t** instance,
-      iree_vm_context_t** context)
+      iree_vm_context_t** context
+      )
 {

  // Find the binary file that describes the model. If the model
@@ -1230,8 +1237,6 @@ ModelInstanceState::Execute(
    IREE_LOG(INFO) << "can't find return buffer view";
  }

-  IREE_CHECK_OK(iree::PrintVariantList(output_tensors, (size_t)1024));
-
  // I want to do this is a seperate function that calls after execute in process tensors
  // I'm doing it like this for now so I can get a demo running

@@ -1329,6 +1334,8 @@ ModelInstanceState::ModelInstanceState(
      break;
  }

+  InitializeRuntimeEnvironment(&input_module_, &hal_module_, &device_, &instance_, &context_);
+
  //THROW_IF_BACKEND_INSTANCE_ERROR(model_state->LoadModel(
  //  ArtifactFilename(), &device_, &model_path_, &instance_, &context_, &input_module_, &hal_module_));

@@ -1452,3 +1459,4 @@ TRITONBACKEND_ModelInstanceExecute(
 }  // extern "C"

 }}}  // namespace triton::backend::dshark
+
--- a/inference/thirdparty/shark-runtime
+++ b/inference/thirdparty/shark-runtime