diff --git a/src/maggotuba/models/modules.py b/src/maggotuba/models/modules.py
index 011c7f93e57d75e5cc2f57aeaca4d14f920a0416..5a9a4f2156b7f441493d1d7f9af68fcdc0260528 100644
--- a/src/maggotuba/models/modules.py
+++ b/src/maggotuba/models/modules.py
@@ -1,3 +1,4 @@
+import logging
 import os
 from pathlib import Path
 import torch
@@ -53,7 +54,11 @@ class MaggotModule(nn.Module):
     @property
     def model(self):
         if self._model is None:
-            self._model = self.load_model(self.config, self.ptfilepath)
+            try:
+                self._model = self.load_model(self.config, self.ptfilepath)
+            except Exception as e:
+                logging.error(e)
+                logging.error('could not load or initialize the model; check the load_model class method')
         return self._model
 
     @model.setter
@@ -86,6 +91,39 @@ class MaggotModule(nn.Module):
         return self.model.parameters(recurse)
 
 
+"""
+Initialize a model's weights and bias (if any).
+
+Adapted from `behavior_model.models.neural_nets.AutoEncoder._init_weights`
+
+Passing `None` as argument `weight_init` or `has_bias` selects the default
+value.
+"""
+def init_weights(model, weight_init='xavier', has_bias=False):
+    if has_bias:
+        nn.init.constant_(model.bias, 0)
+    if weight_init is None:
+        weight_init = 'xavier'
+    _init = dict(
+            kaiming='kaiming_uniform',
+            xavier='xavier_uniform',
+            ).get(weight_init, weight_init)
+    if _init == 'orthogonal':
+        nn.init.orthogonal_(model.weight)
+    elif _init == 'xavier_uniform':
+        nn.init.xavier_uniform_(model.weight)
+    elif _init == 'kaiming_uniform':
+        nn.init.kaiming_uniform_(model.weight, nonlinearity='relu')
+    else:
+        raise ValueError(f"initialization method not supported: {weight_init}")
+
+
+"""
+Note: per default MaggotEncoder represents a retrained encoder
+      (retrained = trained for a behavior-tagging task);
+      see PretrainedMaggotEncoder for encoders that were only pretrained
+      (pretrained = trained in a self-supervised task)
+"""
 class MaggotEncoder(MaggotModule):
     def __init__(self, path,
             cfgfile=None,
@@ -102,7 +140,23 @@ class MaggotEncoder(MaggotModule):
     @classmethod
     def load_model(cls, config, path):
         encoder = Encoder(**config)
-        encoder.load_state_dict(torch.load(path))
+        _reason = None
+        if not config.get('load_state', True):
+            _reason = '"load_state" is set to false'
+        try:
+            encoder.load_state_dict(torch.load(path))
+        except Exception as e:
+            _reason = e
+        # if state file not found or config option "load_state" is False,
+        # (re-)initialize the model's weights
+        if _reason:
+            logging.debug(f"(re-)initializing the encoder ({_reason})")
+            _init, _bias = config.get('init', None), config.get('bias', None)
+            for child in encoder.children():
+                if isinstance(child,
+                              (nn.Linear, nn.Conv2d, nn.Conv1d,
+                               nn.ConvTranspose1d, nn.ConvTranspose2d)):
+                    init_weights(child, _init, _bias)
         return encoder
 
     @functools.lru_cache(maxsize=1)
@@ -137,6 +191,26 @@ class MaggotEncoder(MaggotModule):
             batch = torch.index_select(batch, dim, mask)
         return self.forward(batch)
 
+    """
+    Determine whether the encoder was pretrained as part of a MaggotUBA
+    autoencoder, or only initialized in the context of testing the benefit of
+    using a pretrained encoder.
+
+    This is to be distinguished from the MaggotEncoder/PretrainedMaggotEncoder
+    classes that instead represent the different states *after* and *before*
+    retraining.
+
+    The purpose of this method is to determine at retraining time whether to
+    pretrain the classifier and then fine-tune the full encoder+classifier, or
+    instead train both the classifier and encoder in a single-stage training
+    process. Indeed, the two-stage retraining process only makes sense if the
+    encoder was pretrained.
+
+    See `trainers.MaggotTrainer.train`.
+    """
+    def was_pretrained(self):
+        return self.config.get('load_state', True)
+
 class PretrainedMaggotEncoder(MaggotEncoder):
     def __init__(self, path,
             cfgfile=None,
@@ -214,13 +288,7 @@ class DeepLinear(nn.Module):
     def init_layers(self):
         for layer in self.layers:
             if isinstance(layer, nn.Linear):
-                if self.weight_init == "xavier":
-                    nn.init.xavier_uniform_(layer.weight)
-                elif self.weight_init == "kaiming":
-                    nn.init.kaiming_normal_(layer.weight)
-                else:
-                    raise NotImplementedError(self.weight_init)
-                nn.init.zeros_(layer.bias)
+                init_weights(layer, self.weight_init, True)
 
     def forward(self, x):
         return self.layers(x)
@@ -263,12 +331,7 @@ class MaggotClassifier(MaggotModule):
         try:
             model.load(path)
         except:
-            # try:
-            #     path = config["clf_path"]
-            # except KeyError:
-                model.init_layers()
-            # else:
-            #     model.load(path)
+            model.init_layers()
         return model
 
     @property
@@ -313,6 +376,10 @@ class SupervisedMaggot(nn.Module):
         clf.config["enc_path"] = str(enc.ptfilepath)
         clf.save()
 
+    def parameters(self):
+        self.clf.model # force parameter loading or initialization
+        return super().parameters(self)
+
 class MultiscaleSupervisedMaggot(nn.Module):
     def __init__(self, cfgfilepath, behaviors=[], n_layers=1):
         super().__init__()
@@ -337,3 +404,7 @@ class MultiscaleSupervisedMaggot(nn.Module):
         clf.config["enc_path"] = [str(p) for p in enc.ptfilepaths]
         clf.save()
 
+    def parameters(self):
+        self.clf.model # force parameter loading or initialization
+        return super().parameters(self)
+
diff --git a/src/maggotuba/models/train_model.py b/src/maggotuba/models/train_model.py
index cda4a58655c0dfcdcbd6382f17461ff82c206ef0..768e82896f2b6d8e30b27010c9328e89a1a7a62d 100644
--- a/src/maggotuba/models/train_model.py
+++ b/src/maggotuba/models/train_model.py
@@ -1,9 +1,10 @@
 from taggingbackends.data.labels import Labels
 from taggingbackends.data.dataset import LarvaDataset
 from taggingbackends.explorer import check_permissions
-from maggotuba.models.trainers import MaggotTrainer, MultiscaleMaggotTrainer, new_generator
+from maggotuba.models.trainers import make_trainer, new_generator
 import json
 import glob
+import logging
 
 def train_model(backend, layers=1, pretrained_model_instance="default", subsets=(1, 0, 0), **kwargs):
     # make_dataset generated or moved the larva_dataset file into data/interim/{instance}/
@@ -18,14 +19,14 @@ def train_model(backend, layers=1, pretrained_model_instance="default", subsets=
     # copy and load the pretrained model into the model instance directory
     if isinstance(pretrained_model_instance, str):
         config_file = import_pretrained_model(backend, pretrained_model_instance)
-        model = MaggotTrainer(config_file, labels, layers)
+        model = make_trainer(config_file, labels, layers)
     else:
         pretrained_model_instances = pretrained_model_instance
         config_files = import_pretrained_models(backend, pretrained_model_instances)
-        model = MultiscaleMaggotTrainer(config_files, labels, layers)
+        model = make_trainer(config_files, labels, layers)
     # fine-tune and save the model
     model.train(dataset)
-    print(f"saving model \"{backend.model_instance}\"")
+    logging.info(f"saving model \"{backend.model_instance}\"")
     model.save()
 
 # TODO: merge the below two functions
diff --git a/src/maggotuba/models/trainers.py b/src/maggotuba/models/trainers.py
index af121d141d9a6813c23e93c3d93629c2e3b62d72..ede624423ee157a50046c2052e82088ff57e9582 100644
--- a/src/maggotuba/models/trainers.py
+++ b/src/maggotuba/models/trainers.py
@@ -1,3 +1,4 @@
+import logging
 import numpy as np
 import torch
 import torch.nn as nn
@@ -147,27 +148,30 @@ class MaggotTrainer:
         model.train() # this only sets the model in training mode (enables gradients)
         model.to(self.device)
         criterion = nn.CrossEntropyLoss()
+        nsteps = self.config['optim_iter']
         # pre-train the classifier with static encoder weights
-        optimizer = torch.optim.Adam(model.clf.parameters())
-        print("pre-training the classifier...")
-        for step in range(self.config["optim_iter"] // 2):
-            optimizer.zero_grad()
-            # TODO: add an option for renormalizing the input
-            data, expected = self.draw(dataset)
-            predicted = self.forward(data, train=True)
-            loss = criterion(predicted, expected)
-            #print("pre-train", torch.mean(loss).detach().numpy())
-            loss.backward()
-            optimizer.step()
+        if model.encoder.was_pretrained():
+            nsteps = nsteps // 2
+            optimizer = torch.optim.Adam(model.clf.parameters())
+            logging.info("pre-training the classifier...")
+            for step in range(nsteps):
+                optimizer.zero_grad()
+                # TODO: add an option for renormalizing the input
+                data, expected = self.draw(dataset)
+                predicted = self.forward(data, train=True)
+                loss = criterion(predicted, expected)
+                loss.backward()
+                optimizer.step()
         # fine-tune both the encoder and the classifier
         optimizer = torch.optim.Adam(model.parameters())
-        print("fine-tuning the encoder and classifier...")
-        for step in range(self.config["optim_iter"] // 2):
+        logging.info(
+                ("fine-tuning" if model.encoder.was_pretrained() else "training") + \
+                 " the encoder and classifier...")
+        for step in range(nsteps):
             optimizer.zero_grad()
             data, expected = self.draw(dataset)
             predicted = self.forward(data, train=True)
             loss = criterion(predicted, expected)
-            #print("fine-tune", torch.mean(loss).detach().numpy())
             loss.backward()
             optimizer.step()
         #
@@ -217,6 +221,7 @@ class MaggotTrainer:
 def new_generator(seed=0b11010111001001101001110):
     return torch.Generator(device).manual_seed(seed)
 
+
 class MultiscaleMaggotTrainer(MaggotTrainer):
     def __init__(self, cfgfilepath, behaviors=[], n_layers=1,
             average_body_length=1.0, device=device):
@@ -244,3 +249,18 @@ class MultiscaleMaggotTrainer(MaggotTrainer):
                     self._default_encoder_config = cfg
         return self._default_encoder_config
 
+
+"""
+Pick the adequate trainer following a rapid inspection of the config file(s).
+
+For now, config files are actually not inspected. However, using this function
+is highly recommended as more models are introduced with future releases.
+"""
+def make_trainer(config_file, *args, **kwargs):
+    if isinstance(config_file, list): # multiple encoders
+        config_files = config_file
+        model = MultiscaleMaggotTrainer(config_files, *args, **kwargs)
+    else: # single encoder
+        model = MaggotTrainer(config_file, *args, **kwargs)
+    return model
+
diff --git a/test/train_no_pretrain.sh b/test/train_no_pretrain.sh
new file mode 100755
index 0000000000000000000000000000000000000000..616f4cb7650c1af81b911a6e6e7c543b844b5e26
--- /dev/null
+++ b/test/train_no_pretrain.sh
@@ -0,0 +1,134 @@
+#!/bin/bash
+
+[ -f ./pyproject.toml ] || cd ..
+[ -f ./pyproject.toml ] || exit "failed to locate MaggotUBA-adapter project"
+
+# pick any data file;
+# could fetch and untar data file https://gitlab.pasteur.fr/nyx/artefacts/-/raw/master/PlanarLarvae/trx.mat.tgz?inline=false instead, for example
+datapath="t15/FCF_attP2_1500062@UAS_Chrimson_Venus_X_0070/r_LED100_30s2x15s30s#n#n#n@100/20140723_113810"
+reldatapath="data/raw/subset4/$datapath"
+if ! [ -d "$reldatapath" ]; then
+mkdir -p "$reldatapath"
+cp "../data/$datapath/trx.mat" "$reldatapath"
+fi
+
+modeldir=pretrained_models/untrained
+if ! [ -d "$modeldir" ]; then
+mkdir -p "$modeldir"
+cat << "EOT" > "${modeldir}/autoencoder_config.json"
+{
+  "project_dir": "",
+  "seed": 100,
+  "exp_name": "",
+  "data_dir": "",
+  "raw_data_dir": "",
+  "log_dir": "",
+  "exp_folder": "",
+  "config": "",
+  "num_workers": 4,
+  "n_features": 10,
+  "len_traj": 20,
+  "len_pred": 20,
+  "dim_latent": 100,
+  "activation": "relu",
+  "enc_filters": [
+    128,
+    64,
+    32,
+    32,
+    32,
+    16
+  ],
+  "dec_filters": [
+    128,
+    64,
+    32,
+    32,
+    32,
+    16
+  ],
+  "enc_kernel": [
+    [
+      5,
+      1
+    ],
+    [
+      1,
+      20
+    ],
+    [
+      5,
+      1
+    ],
+    [
+      1,
+      20
+    ],
+    [
+      5,
+      1
+    ],
+    [
+      1,
+      20
+    ]
+  ],
+  "dec_kernel": [
+    [
+      1,
+      20
+    ],
+    [
+      5,
+      1
+    ],
+    [
+      1,
+      20
+    ],
+    [
+      5,
+      1
+    ],
+    [
+      1,
+      20
+    ],
+    [
+      5,
+      1
+    ]
+  ],
+  "bias": false,
+  "enc_depth": 4,
+  "dec_depth": 4,
+  "init": "kaiming",
+  "n_clusters": 2,
+  "dim_reduc": "UMAP",
+  "optim_iter": 1000,
+  "pseudo_epoch": 100,
+  "batch_size": 128,
+  "lr": 0.005,
+  "loss": "MSE",
+  "cluster_penalty": null,
+  "cluster_penalty_coef": 0.0,
+  "length_penalty_coef": 0.0,
+  "grad_clip": 100.0,
+  "optimizer": "adam",
+  "target": [
+    "past",
+    "present",
+    "future"
+  ],
+  "spine_interpolation": "linear",
+  "frame_interval": 0.1,
+  "swap_head_tail": false,
+  "load_state": false
+}
+EOT
+cp pretrained_models/default/best_validated_encoder.pt "$modeldir"
+fi
+
+rm -rf models/subset4
+
+JULIA_PROJECT=$(realpath ../TaggingBackends) poetry run tagging-backend train --model-instance subset4 --pretrained-model-instance untrained --labels run,cast,back,hunch