improve UI of textual inversion frontend

- File selection box now accepts directories that don't exist yet.
- Fixed crash when resume is selected and no files available to resume from.
This commit is contained in:
Lincoln Stein
2023-01-15 17:04:14 -05:00
parent 251a409087
commit bfeafa8d5e
2 changed files with 37 additions and 25 deletions

View File

@@ -518,10 +518,10 @@ def do_textual_inversion_training(
pretrained_model_name_or_path = model_conf.get('repo_id',None) or Path(model_conf.get('path'))
assert pretrained_model_name_or_path, f"models.yaml error: neither 'repo_id' nor 'path' is defined for {model}"
pipeline_args = dict(cache_dir=global_cache_dir('diffusers'))
# Load tokenizer
if tokenizer_name:
tokenizer = CLIPTokenizer.from_pretrained(tokenizer_name,cache_dir=global_cache_dir('transformers'))
tokenizer = CLIPTokenizer.from_pretrained(tokenizer_name,**pipeline_args)
else:
tokenizer = CLIPTokenizer.from_pretrained(pretrained_model_name_or_path, subfolder="tokenizer", **pipeline_args)
@@ -670,24 +670,28 @@ def do_textual_inversion_training(
logger.info(f" Total optimization steps = {max_train_steps}")
global_step = 0
first_epoch = 0
resume_step = None
# Potentially load in the weights and states from a previous save
if resume_from_checkpoint:
if resume_from_checkpoint != "latest":
path = os.path.basename(resume_from_checkpoint)
else:
# Get the most recent checkpoint
dirs = os.listdir(output_dir)
dirs = [d for d in dirs if d.startswith("checkpoint")]
dirs = sorted(dirs, key=lambda x: int(x.split("-")[1]))
path = dirs[-1]
accelerator.print(f"Resuming from checkpoint {path}")
accelerator.load_state(os.path.join(output_dir, path))
global_step = int(path.split("-")[1])
try:
if resume_from_checkpoint != "latest":
path = os.path.basename(resume_from_checkpoint)
else:
# Get the most recent checkpoint
dirs = os.listdir(output_dir)
dirs = [d for d in dirs if d.startswith("checkpoint")]
dirs = sorted(dirs, key=lambda x: int(x.split("-")[1]))
path = dirs[-1]
accelerator.print(f"Resuming from checkpoint {path}")
accelerator.load_state(os.path.join(output_dir, path))
global_step = int(path.split("-")[1])
resume_global_step = global_step * gradient_accumulation_steps
first_epoch = resume_global_step // num_update_steps_per_epoch
resume_step = resume_global_step % num_update_steps_per_epoch
resume_global_step = global_step * gradient_accumulation_steps
first_epoch = resume_global_step // num_update_steps_per_epoch
resume_step = resume_global_step % num_update_steps_per_epoch
except:
logger.warn("No checkpoint available to resume from")
# Only show the progress bar once on each machine.
progress_bar = tqdm(range(global_step, max_train_steps), disable=not accelerator.is_local_main_process)
@@ -700,7 +704,7 @@ def do_textual_inversion_training(
text_encoder.train()
for step, batch in enumerate(train_dataloader):
# Skip steps until we reach the resumed step
if resume_from_checkpoint and epoch == first_epoch and step < resume_step:
if resume_step and resume_from_checkpoint and epoch == first_epoch and step < resume_step:
if step % gradient_accumulation_steps == 0:
progress_bar.update(1)
continue