diff --git a/.all-contributorsrc b/.all-contributorsrc
index f5b5f043c21..186246ef9e7 100644
--- a/.all-contributorsrc
+++ b/.all-contributorsrc
@@ -2645,6 +2645,16 @@
         "maintenance"
       ]
     },
+    {
+      "login": "SaiRevanth25",
+      "name": "Sai Revanth Gowravajhala",
+      "avatar_url": "https://avatars.githubusercontent.com/SaiRevanth25",
+      "profile": "https://github.com/SaiRevanth25",
+      "contributions": [
+        "code",
+        "bug"
+      ]
+    },
     {
       "login": "XinyuWuu",
       "name": "Xinyu Wu",
diff --git a/README.md b/README.md
index 14d9a1ae34e..d23dc2965b1 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
 
 > A unified interface for machine learning with time series
 
-:rocket: **Version 0.32.4 out now!** [Check out the release notes here](https://www.sktime.net/en/latest/changelog.html).
+:rocket: **Version 0.33.0 out now!** [Check out the release notes here](https://www.sktime.net/en/latest/changelog.html).
 
 sktime is a library for time series analysis in Python. It provides a unified interface for multiple time series learning tasks. Currently, this includes time series classification, regression, clustering, annotation, and forecasting. It comes with [time series algorithms](https://www.sktime.net/en/stable/estimator_overview.html) and [scikit-learn] compatible tools to build, tune and validate time series models.
 
diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
index 47fbde099cb..4f058e2a281 100644
--- a/docs/source/changelog.rst
+++ b/docs/source/changelog.rst
@@ -19,6 +19,66 @@ For upcoming changes and next releases, see our `milestones <https://github.com/
 For our long-term plan, see our :ref:`roadmap`.
 
 
+Version 0.33.0 - 2024-09-09
+---------------------------
+
+Maintenance release, with scheduled deprecations and change actions.
+
+For last non-maintenance content updates, see 0.32.4 and 0.32.2.
+
+Dependency changes
+~~~~~~~~~~~~~~~~~~
+
+* ``numpy`` (core dependency) bounds have been updated to ``>=1.21,<2.2``
+* ``scikit-base`` (core dependency) bounds have been updated to ``>=0.6.1,<0.10.0``
+* ``skpro`` (soft dependency) bounds have been updated to ``>=2,<2.7.0``
+* ``holidays`` (transformations soft dependency) bounds have been updated to ``>=0.29,<0.57``
+* ``pykan`` (deep learning soft dependency) bounds have been updated to ``>=0.2,<0.2.7``
+* ``mne`` (transformations soft dependency) bounds have been updated to ``>=1.5,<1.9``
+* ``dask`` (data container and parallelization back-end soft dependency) bounds have been updated to ``<2024.8.3``
+* ``pytorch-forecasting`` (forecasting soft dependency) bounds have been updated to ``>=1.0.0,<1.2.0``
+
+Deprecations and removals
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* in ``DirectReductionForecaster`` the default for ``windows_identical`` has changed
+  to ``False``.
+
+Maintenance
+~~~~~~~~~~~
+
+* [MNT] Try to reduce load for the runners (:pr:`7061`) :user:`benHeid`
+* [MNT] 0.33.0 deprecations and change actions (:pr:`7091`) :user:`fkiraly`
+* [MNT] ffp3 datasets URLs changed on CRAN; updated ``_fpp3_loaders.py`` accordingly (:pr:`7084`) :user:`ericjb`
+* [MNT] remove <3.11 restriction for ``pytorch-forecasting``, add upper bound (:pr:`7092`) :user:`fkiraly`
+* [MNT] [Dependabot](deps): Update ``dask`` requirement from ``<2024.8.2`` to ``<2024.8.3`` (:pr:`7062`) :user:`dependabot[bot]`
+* [MNT] [Dependabot](deps): Update ``numpy`` requirement from ``<2.1,>=1.21`` to ``>=1.21,<2.2`` (:pr:`7007`) :user:`dependabot[bot]`
+* [MNT] [Dependabot](deps): Update ``scikit-base`` requirement from ``<0.9.0,>=0.6.1`` to ``>=0.6.1,<0.10.0`` (:pr:`7035`) :user:`dependabot[bot]`
+* [MNT] [Dependabot](deps): Update ``holidays`` requirement from ``<0.56,>=0.29`` to ``>=0.29,<0.57`` (:pr:`7096`) :user:`dependabot[bot]`
+* [MNT] [Dependabot](deps): Update ``pykan`` requirement from ``<0.2.2,>=0.2`` to ``>=0.2,<0.2.7`` (:pr:`7010`) :user:`dependabot[bot]`
+* [MNT] [Dependabot](deps): Update ``mne`` requirement from ``<1.8,>=1.5`` to ``>=1.5,<1.9`` (:pr:`7004`) :user:`dependabot[bot]`
+
+Documentation
+~~~~~~~~~~~~~
+
+* [DOC] Adds :user:`SaiRevanth25`` contributions to all-contributors file (:pr:`7085`) :user:`SaiRevanth25`
+* [DOC] fix typo and formatting in installation docs (:pr:`7060`) :user:`Saptarshi-Bandopadhyay`
+
+Enhancements
+~~~~~~~~~~~~
+
+* [ENH] change ``test_inheritance`` to be more lenient to framework level extensions (:pr:`7067`) :user:`fkiraly`
+
+Contributors
+~~~~~~~~~~~~
+
+:user:`benHeid`,
+:user:`ericjb`,
+:user:`fkiraly`,
+:user:`SaiRevanth25`,
+:user:`Saptarshi-Bandopadhyay`
+
+
 Version 0.32.4 - 2024-09-06
 ---------------------------
 
diff --git a/docs/source/installation.rst b/docs/source/installation.rst
index 50c7fbd72b2..cc094bc4352 100644
--- a/docs/source/installation.rst
+++ b/docs/source/installation.rst
@@ -87,9 +87,9 @@ Development versions
 --------------------
 To install the latest development version of ``sktime``, or earlier versions, the sequence of steps is as follows:
 
-Step 1 - ``git`` clone the ``sktime`` repository, the latest version or an earlier version.
-Step 2 - ensure build requirements are satisfied
-Step 3 - ``pip`` install the package from a ``git`` clone, with the ``editable`` parameter.
+| Step 1 - ``git`` clone the ``sktime`` repository, the latest version or an earlier version.
+| Step 2 - ensure build requirements are satisfied
+| Step 3 - ``pip`` install the package from a ``git`` clone, with the ``editable`` parameter.
 
 Detail instructions for all steps are given below.
 For brevity, we discuss steps 1 and 3 first; step 2 is discussed at the end, as it will depend on the operating system.
@@ -121,7 +121,7 @@ Valid version strings are the repository's ``git`` tags, which can be inspected
 You can also `download <https://github.com/sktime/sktime/releases>`_ a zip archive of the version from GitHub.
 
 
-Step 2 - building sktime from source
+Step 3 - building sktime from source
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 To build and install ``sktime`` from source, navigate to the local clone's root directory and type:
@@ -152,7 +152,7 @@ This allows editing and extending the code in-place. See also
     every time the source code of a compiled extension is changed (for
     instance when switching branches or pulling changes from upstream).
 
-Building binary packages and installers
+Step 2 - Building binary packages and installers
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 The ``.whl`` package and ``.exe`` installers can be built with:
diff --git a/pyproject.toml b/pyproject.toml
index 2bfaa22f195..8a2f63cebf8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "sktime"
-version = "0.32.4"
+version = "0.33.0"
 description = "A unified framework for machine learning with time series"
 readme = "README.md"
 keywords = [
@@ -56,7 +56,7 @@ dependencies = [
   "numpy<2.1,>=1.21",  # required for framework layer and base class logic
   "packaging",  # for estimator specific dependency parsing
   "pandas<2.3.0,>=1.1",  # pandas is the main in-memory data container
-  "scikit-base>=0.6.1,<0.9.0",  # base module for sklearn compatible base API
+  "scikit-base>=0.6.1,<0.10.0",  # base module for sklearn compatible base API
   "scikit-learn>=0.24,<1.6.0",  # required for estimators and framework layer
   "scipy<2.0.0,>=1.2",  # required for estimators and framework layer
 ]
@@ -81,7 +81,7 @@ all_extras = [
   'autots<0.7,>=0.6.1',
   "cloudpickle",
   "dash!=2.9.0",
-  "dask<2024.8.2; extra == 'dataframe'",
+  "dask<2024.8.3; extra == 'dataframe'",
   "dtaidistance<2.4",
   "dtw-python",
   'esig==0.9.7; python_version < "3.10"',
@@ -125,7 +125,7 @@ all_extras_pandas2 = [
   'autots<0.7,>=0.6.1',
   "cloudpickle",
   "dash!=2.9.0",
-  "dask<2024.8.2; extra == 'dataframe'",
+  "dask<2024.8.3; extra == 'dataframe'",
   "dtaidistance<2.4",
   "dtw-python",
   'esig==0.9.7; python_version < "3.10"',
@@ -217,8 +217,8 @@ regression = [
 transformations = [
   'esig<0.10,>=0.9.7; python_version < "3.11"',
   "filterpy<1.5,>=1.4.5",
-  "holidays>=0.29,<0.56",
-  "mne>=1.5,<1.8",
+  "holidays>=0.29,<0.57",
+  "mne>=1.5,<1.9",
   'numba<0.61,>=0.53',
   "pycatch22>=0.4,<0.4.6",
   "statsmodels<0.15,>=0.12.1",
@@ -287,8 +287,8 @@ dl = [
   'tensorflow<2.17,>=2; python_version < "3.12"',
   'torch; python_version < "3.12"',
   'transformers[torch]<4.41.0; python_version < "3.12"',
-  'pykan<0.2.2,>=0.2; python_version > "3.9.7"',
-  'pytorch-forecasting>=1.0.0; python_version < "3.11"',
+  'pykan>=0.2,<0.2.7; python_version > "3.9.7"',
+  'pytorch-forecasting>=1.0.0,<1.2.0',
 ]
 mlflow = [
   "mlflow",
@@ -300,7 +300,7 @@ mlflow_tests = [
   "moto",
 ]
 numpy1 = [
-  "numpy<2.0.0",
+  "numpy<3.0.0",
 ]
 pandas1 = [
   "pandas<2.0.0",
diff --git a/sktime/__init__.py b/sktime/__init__.py
index ec1aa33e120..8e8cb293bff 100644
--- a/sktime/__init__.py
+++ b/sktime/__init__.py
@@ -1,6 +1,6 @@
 """sktime."""
 
-__version__ = "0.32.4"
+__version__ = "0.33.0"
 
 __all__ = ["show_versions"]
 
diff --git a/sktime/base/_base.py b/sktime/base/_base.py
index e4551a3ae9b..4b4af3cf16f 100644
--- a/sktime/base/_base.py
+++ b/sktime/base/_base.py
@@ -149,7 +149,7 @@ class BaseObject(_HTMLDocumentationLinkMixin, _BaseObject):
         """,
     }
 
-    # TODO 0.33.0: check whether python 3.8 has reached EoL.
+    # TODO 0.34.0: check whether python 3.8 has reached EoL.
     # If so, remove warning altogether
     def __init__(self):
         super().__init__()
@@ -163,7 +163,7 @@ def __init__(self):
         py39_or_higher = SpecifierSet(">=3.9")
         sys_version = sys.version.split(" ")[0]
 
-        # todo 0.33.0 - check whether python 3.8 eol is reached.
+        # todo 0.34.0 - check whether python 3.8 eol is reached.
         # If yes, remove this msg.
         if sys_version not in py39_or_higher:
             warn(
@@ -184,7 +184,7 @@ def __init__(self):
         # for rationale, see _handle_numpy2_softdeps
         self._handle_numpy2_softdeps()
 
-    # TODO 0.33.0: check list of numpy 2 incompatible soft deps
+    # TODO 0.34.0: check list of numpy 2 incompatible soft deps
     # remove any from NOT_NP2_COMPATIBLE that become compatible
     def _handle_numpy2_softdeps(self):
         """Handle tags for soft deps that are not numpy 2 compatible.
diff --git a/sktime/classification/early_classification/_probability_threshold.py b/sktime/classification/early_classification/_probability_threshold.py
index 12c7d7517ea..0ddb564f2eb 100644
--- a/sktime/classification/early_classification/_probability_threshold.py
+++ b/sktime/classification/early_classification/_probability_threshold.py
@@ -20,7 +20,7 @@
 from sktime.utils.validation.panel import check_X
 
 
-# TODO: fix this in 0.33.0
+# TODO: fix this in 0.34.0
 # base class should have been changed to BaseEarlyClassifier
 class ProbabilityThresholdEarlyClassifier(BaseClassifier):
     """Probability Threshold Early Classifier.
diff --git a/sktime/datasets/_fpp3_loaders.py b/sktime/datasets/_fpp3_loaders.py
index 745eba4d7ca..bd0f17811a7 100755
--- a/sktime/datasets/_fpp3_loaders.py
+++ b/sktime/datasets/_fpp3_loaders.py
@@ -70,8 +70,8 @@
 
 
 def _get_dataset_url(dataset_name):
-    url_fpp3 = "https://cran.r-project.org/src/contrib/fpp3_0.5.tar.gz"
-    url_tsibble = "https://cran.r-project.org/src/contrib/tsibble_1.1.4.tar.gz"
+    url_fpp3 = "https://cran.r-project.org/src/contrib/fpp3_1.0.0.tar.gz"
+    url_tsibble = "https://cran.r-project.org/src/contrib/tsibble_1.1.5.tar.gz"
     url_tsibbledata = "https://cran.r-project.org/src/contrib/tsibbledata_0.4.1.tar.gz"
 
     if dataset_name in fpp3:
diff --git a/sktime/datasets/tests/test_datadownload.py b/sktime/datasets/tests/test_datadownload.py
index 625ed5cd33e..4fbf44e7d28 100644
--- a/sktime/datasets/tests/test_datadownload.py
+++ b/sktime/datasets/tests/test_datadownload.py
@@ -110,6 +110,22 @@ def test_load_forecasting_data_invalid_name(name):
 @pytest.mark.datadownload
 def test_load_fpp3():
     """Test loading downloaded dataset from ."""
+
+    import requests
+
+    from sktime.datasets._fpp3_loaders import _get_dataset_url
+
+    for dataset_name in ["aus_accommodation", "pedestrian", "ansett"]:
+        ret, url = _get_dataset_url(dataset_name)
+        assert ret is True
+        try:
+            response = requests.head(url)
+            if response.status_code != 200:
+                ret = False
+        except requests.RequestException:
+            ret = False
+        assert ret is True
+
     olympic_running = load_fpp3("olympic_running")
 
     assert isinstance(olympic_running, pd.DataFrame)
diff --git a/sktime/forecasting/base/_fh.py b/sktime/forecasting/base/_fh.py
index 6a9d05bfa00..16f84ee378f 100644
--- a/sktime/forecasting/base/_fh.py
+++ b/sktime/forecasting/base/_fh.py
@@ -806,7 +806,7 @@ def _to_relative(fh: ForecastingHorizon, cutoff=None) -> ForecastingHorizon:
             absolute = _coerce_to_period(absolute, freq=fh._freq)
             cutoff = _coerce_to_period(cutoff, freq=fh._freq)
 
-        # TODO: 0.33.0:
+        # TODO: 0.34.0:
         # Check at every minor release whether lower pandas bound >=0.15.0
         # if yes, can remove the workaround in the "else" condition and the check
         #
diff --git a/sktime/forecasting/base/tests/test_fh.py b/sktime/forecasting/base/tests/test_fh.py
index 3e99663b51a..6c594b4146b 100644
--- a/sktime/forecasting/base/tests/test_fh.py
+++ b/sktime/forecasting/base/tests/test_fh.py
@@ -733,7 +733,7 @@ def test_exponential_smoothing_case_with_naive():
 
 
 # TODO: Replace this long running test with fast unit test
-# todo 0.33.0: check whether numpy 2 bound is still necessary
+# todo 0.34.0: check whether numpy 2 bound is still necessary
 @pytest.mark.skipif(
     not run_test_module_changed(["sktime.forecasting.base", "sktime.datatypes"])
     or not _check_estimator_deps(AutoARIMA, severity="none")
diff --git a/sktime/forecasting/compose/_pipeline.py b/sktime/forecasting/compose/_pipeline.py
index 1a07abe1ea1..8403e906b37 100644
--- a/sktime/forecasting/compose/_pipeline.py
+++ b/sktime/forecasting/compose/_pipeline.py
@@ -163,7 +163,7 @@ def _get_inverse_transform(self, transformers, y, X=None, mode=None):
                         if len(levels) == 1:
                             levels = levels[0]
                         yt[ix] = y.xs(ix, level=levels, axis=1)
-                        # todo 0.33.0 - check why this cannot be easily removed
+                        # todo 0.34.0 - check why this cannot be easily removed
                         # in theory, we should get rid of the "Coverage" case treatment
                         # (the legacy naming convention was removed in 0.23.0)
                         # deal with the "Coverage" case, we need to get rid of this
@@ -1816,7 +1816,7 @@ def get_test_params(cls, parameter_set="default"):
         params1 = {"forecaster_X": fx, "forecaster_y": fy}
 
         # example with probabilistic capability
-        # todo 0.33.0: check if numpy<2 is still needed
+        # todo 0.34.0: check if numpy<2 is still needed
         if _check_soft_dependencies(["pmdarima", "numpy<2"], severity="none"):
             fy_proba = ARIMA()
         else:
diff --git a/sktime/forecasting/compose/_reduce.py b/sktime/forecasting/compose/_reduce.py
index 6b261b0ee89..c58128284cc 100644
--- a/sktime/forecasting/compose/_reduce.py
+++ b/sktime/forecasting/compose/_reduce.py
@@ -1828,10 +1828,6 @@ def _get_expected_pred_idx(self, fh):
         return fh_idx
 
 
-# TODO (release 0.33.0)
-# change the default of `windows_identical` to `False`
-# update the docstring for parameter `windows_identical`
-# remove the corresponding warning and simplify __init__
 class DirectReductionForecaster(BaseForecaster, _ReducerMixin):
     """Direct reduction forecaster, incl single-output, multi-output, exogeneous Dir.
 
@@ -1898,7 +1894,7 @@ class DirectReductionForecaster(BaseForecaster, _ReducerMixin):
         "panel" = second lowest level, one reduced model per panel level (-2)
         if there are 2 or less levels, "global" and "panel" result in the same
         if there is only 1 level (single time series), all three settings agree
-    windows_identical : bool, optional, default=True
+    windows_identical : bool, optional, default=False
         Specifies whether all direct models use the same number of observations
         or a different number of observations.
 
@@ -1911,8 +1907,6 @@ class DirectReductionForecaster(BaseForecaster, _ReducerMixin):
         * `False` : Window size differs for each forecasting horizon. Window
           length corresponds to (total observations + 1 - window_length +
           forecasting horizon).
-
-        Default value will change to `False` in version 0.33.0.
     """
 
     _tags = {
@@ -1932,7 +1926,7 @@ def __init__(
         X_treatment="concurrent",
         impute_method="bfill",
         pooling="local",
-        windows_identical="changing_value",
+        windows_identical=False,
     ):
         self.window_length = window_length
         self.transformers = transformers
@@ -1942,22 +1936,6 @@ def __init__(
         self.impute_method = impute_method
         self.pooling = pooling
         self.windows_identical = windows_identical
-        if windows_identical == "changing_value":
-            warn(
-                "In `DirectReductionForecaster`, the default value of parameter "
-                "`windows_identical` will change to `False` in version 0.33.0. "
-                "Before the introduction of `windows_identical`, the parameter "
-                "defaulted implicitly to `True` when `X_treatment` was set to "
-                "`shifted`, and to `False` when `X_treatment` was set to "
-                "`concurrent`. To keep current behaviour and to silence this "
-                "warning, set `windows_identical` explicitly.",
-            )
-            if X_treatment == "shifted":
-                self._windows_identical = True
-            else:
-                self._windows_identical = False
-        else:
-            self._windows_identical = windows_identical
         self._lags = list(range(window_length))
         super().__init__()
 
@@ -1985,7 +1963,7 @@ def _fit(self, y, X, fh):
         """Fit dispatcher based on X_treatment and windows_identical."""
         # shifted X (future X unknown) and identical windows reduce to
         # multioutput regression, o/w fit multiple individual estimators
-        if (self.X_treatment == "shifted") and (self._windows_identical is True):
+        if (self.X_treatment == "shifted") and (self.windows_identical is True):
             return self._fit_multioutput(y=y, X=X, fh=fh)
         else:
             return self._fit_multiple(y=y, X=X, fh=fh)
@@ -1993,7 +1971,7 @@ def _fit(self, y, X, fh):
     def _predict(self, X=None, fh=None):
         """Predict dispatcher based on X_treatment and windows_identical."""
         if self.X_treatment == "shifted":
-            if self._windows_identical is True:
+            if self.windows_identical is True:
                 return self._predict_multioutput(X=X, fh=fh)
             else:
                 return self._predict_multiple(X=self._X, fh=fh)
@@ -2085,7 +2063,7 @@ def _fit_multiple(self, y, X=None, fh=None):
 
         impute_method = self.impute_method
         X_treatment = self.X_treatment
-        windows_identical = self._windows_identical
+        windows_identical = self.windows_identical
 
         # lagger_y_to_X_ will lag y to obtain the sklearn X
         lags = self._lags
diff --git a/sktime/forecasting/model_selection/__init__.py b/sktime/forecasting/model_selection/__init__.py
index 387dabc83ed..79cb28f024f 100644
--- a/sktime/forecasting/model_selection/__init__.py
+++ b/sktime/forecasting/model_selection/__init__.py
@@ -20,7 +20,7 @@
 )
 
 
-# todo 0.33.0 - check whether we should remove, otherwise bump
+# todo 0.34.0 - check whether we should remove, otherwise bump
 # still used in blog posts and old tutorials
 def temporal_train_test_split(
     y, X=None, test_size=None, train_size=None, fh=None, anchor="start"
@@ -47,7 +47,7 @@ def temporal_train_test_split(
     )
 
 
-# todo 0.33.0 - check whether we should remove, otherwise bump
+# todo 0.34.0 - check whether we should remove, otherwise bump
 # still used in blog posts and old tutorials
 def ExpandingWindowSplitter(fh=1, initial_window=10, step_length=1):
     """Legacy export of Expanding window splitter.
@@ -70,7 +70,7 @@ def ExpandingWindowSplitter(fh=1, initial_window=10, step_length=1):
     return _EWSplitter(fh=fh, initial_window=initial_window, step_length=step_length)
 
 
-# todo 0.33.0 - check whether we should remove, otherwise bump
+# todo 0.34.0 - check whether we should remove, otherwise bump
 # still used in blog posts and old tutorials
 def SlidingWindowSplitter(
     fh=1, window_length=10, step_length=1, initial_window=None, start_with_window=True
diff --git a/sktime/forecasting/model_selection/_tune.py b/sktime/forecasting/model_selection/_tune.py
index 1f27f40fefa..ebf861dd900 100644
--- a/sktime/forecasting/model_selection/_tune.py
+++ b/sktime/forecasting/model_selection/_tune.py
@@ -2,7 +2,6 @@
 # copyright: sktime developers, BSD-3-Clause License (see LICENSE file)
 """Implements grid search functionality to tune forecasters."""
 
-__author__ = ["mloning", "fkiraly", "aiwalter"]
 __all__ = [
     "ForecastingGridSearchCV",
     "ForecastingRandomizedSearchCV",
@@ -85,7 +84,7 @@ def __init__(
         if tune_by_variable:
             self.set_tags(**{"scitype:y": "univariate"})
 
-        # todo 0.33.0: check if this is still necessary
+        # todo 0.34.0: check if this is still necessary
         # n_jobs is deprecated, left due to use in tutorials, books, blog posts
         if n_jobs != "deprecated":
             warn(
diff --git a/sktime/forecasting/pytorchforecasting.py b/sktime/forecasting/pytorchforecasting.py
index c1c2ec9a5d0..7925676c7dd 100644
--- a/sktime/forecasting/pytorchforecasting.py
+++ b/sktime/forecasting/pytorchforecasting.py
@@ -66,7 +66,7 @@ class PytorchForecastingTFT(_PytorchForecastingAdapter):
     >>> model = PytorchForecastingTFT(
     ...     trainer_params={
     ...         "max_epochs": 5,  # for quick test
-    ...         "limit_train_batches": 10,  # for quick test
+    ...         "limit_train_batches": 2,  # for quick test
     ...     },
     ... )
     >>> # fit and predict
@@ -204,7 +204,7 @@ def get_test_params(cls, parameter_set="default"):
                 {
                     "trainer_params": {
                         "max_epochs": 1,  # for quick test
-                        "limit_train_batches": 10,  # for quick test
+                        "limit_train_batches": 2,  # for quick test
                         "enable_checkpointing": False,
                         "logger": False,
                     },
@@ -212,27 +212,29 @@ def get_test_params(cls, parameter_set="default"):
                         "max_encoder_length": 3,
                     },
                     "model_params": {
-                        "hidden_size": 8,
+                        "hidden_size": 4,
                         "lstm_layers": 1,
                         "log_interval": -1,
                     },
+                    "train_to_dataloader_params": {"batch_size": 2},
                     "random_log_path": True,  # fix multiprocess file access error in CI
                 },
                 {
                     "trainer_params": {
                         "max_epochs": 1,  # for quick test
-                        "limit_train_batches": 10,  # for quick test
+                        "limit_train_batches": 2,  # for quick test
                         "enable_checkpointing": False,
                         "logger": False,
                     },
                     "model_params": {
-                        "hidden_size": 8,
+                        "hidden_size": 4,
                         "lstm_layers": 1,
                         "dropout": 0.1,
                         "optimizer": "Adam",
                         # avoid jdb78/pytorch-forecasting#1571 bug in the CI
                         "log_interval": -1,
                     },
+                    "train_to_dataloader_params": {"batch_size": 2},
                     "dataset_params": {
                         "max_encoder_length": 3,
                     },
@@ -255,30 +257,31 @@ def get_test_params(cls, parameter_set="default"):
                 {
                     "trainer_params": {
                         "max_epochs": 1,  # for quick test
-                        "limit_train_batches": 10,  # for quick test
+                        "limit_train_batches": 2,  # for quick test
                         "enable_checkpointing": False,
                         "logger": False,
                     },
                     "model_params": {
                         "log_interval": -1,
-                        "hidden_size": 8,
+                        "hidden_size": 4,
                         "lstm_layers": 1,
                     },
                     "dataset_params": {
                         "max_encoder_length": 3,
                     },
+                    "train_to_dataloader_params": {"batch_size": 2},
                     "random_log_path": True,  # fix multiprocess file access error in CI
                 },
                 {
                     "trainer_params": {
                         "callbacks": [early_stop_callback],
                         "max_epochs": 1,  # for quick test
-                        "limit_train_batches": 10,  # for quick test
+                        "limit_train_batches": 2,  # for quick test
                         "enable_checkpointing": False,
                         "logger": False,
                     },
                     "model_params": {
-                        "hidden_size": 8,
+                        "hidden_size": 4,
                         "lstm_layers": 1,
                         "dropout": 0.1,
                         # "loss": QuantileLoss(),
@@ -291,6 +294,7 @@ def get_test_params(cls, parameter_set="default"):
                     "dataset_params": {
                         "max_encoder_length": 3,
                     },
+                    "train_to_dataloader_params": {"batch_size": 2},
                     "random_log_path": True,  # fix multiprocess file access error in CI
                 },
             ]
@@ -482,13 +486,14 @@ def get_test_params(cls, parameter_set="default"):
                 {
                     "trainer_params": {
                         "max_epochs": 1,  # for quick test
-                        "limit_train_batches": 10,  # for quick test
+                        "limit_train_batches": 2,  # for quick test
                         "enable_checkpointing": False,
                         "logger": False,
                     },
                     "dataset_params": {
                         "max_encoder_length": 3,
                     },
+                    "train_to_dataloader_params": {"batch_size": 2},
                     "model_params": {
                         "num_blocks": [1, 1],
                         "num_block_layers": [1, 1],
@@ -500,7 +505,7 @@ def get_test_params(cls, parameter_set="default"):
                 {
                     "trainer_params": {
                         "max_epochs": 1,  # for quick test
-                        "limit_train_batches": 10,  # for quick test
+                        "limit_train_batches": 2,  # for quick test
                         "enable_checkpointing": False,
                         "logger": False,
                     },
@@ -514,6 +519,7 @@ def get_test_params(cls, parameter_set="default"):
                     "dataset_params": {
                         "max_encoder_length": 3,
                     },
+                    "train_to_dataloader_params": {"batch_size": 2},
                     "random_log_path": True,  # fix multiprocess file access error in CI
                 },
             ]
@@ -531,7 +537,7 @@ def get_test_params(cls, parameter_set="default"):
                 {
                     "trainer_params": {
                         "max_epochs": 1,  # for quick test
-                        "limit_train_batches": 10,  # for quick test
+                        "limit_train_batches": 2,  # for quick test
                         "enable_checkpointing": False,
                         "logger": False,
                     },
@@ -544,13 +550,14 @@ def get_test_params(cls, parameter_set="default"):
                     "dataset_params": {
                         "max_encoder_length": 3,
                     },
+                    "train_to_dataloader_params": {"batch_size": 2},
                     "random_log_path": True,  # fix multiprocess file access error in CI
                 },
                 {
                     "trainer_params": {
                         "callbacks": [early_stop_callback],
                         "max_epochs": 1,  # for quick test
-                        "limit_train_batches": 10,  # for quick test
+                        "limit_train_batches": 2,  # for quick test
                         "enable_checkpointing": False,
                         "logger": False,
                     },
@@ -565,6 +572,7 @@ def get_test_params(cls, parameter_set="default"):
                     "dataset_params": {
                         "max_encoder_length": 3,
                     },
+                    "train_to_dataloader_params": {"batch_size": 2},
                     "random_log_path": True,  # fix multiprocess file access error in CI
                 },
             ]
@@ -770,39 +778,38 @@ def get_test_params(cls, parameter_set="default"):
                 {
                     "trainer_params": {
                         "max_epochs": 1,  # for quick test
-                        "limit_train_batches": 10,  # for quick test
-                        "enable_checkpointing": False,
+                        "limit_train_batches": 2,  # for quick test
                         "logger": False,
                     },
                     "model_params": {
                         "cell_type": "GRU",
                         "rnn_layers": 1,
-                        "hidden_size": 3,
-                        "enable_checkpointing": False,
+                        "hidden_size": 2,
                         "log_interval": -1,
                     },
                     "dataset_params": {
                         "max_encoder_length": 3,
                     },
+                    "train_to_dataloader_params": {"batch_size": 2},
                     "random_log_path": True,  # fix multiprocess file access error in CI
                     "deterministic": True,  # to pass test_score
                 },
                 {
                     "trainer_params": {
                         "max_epochs": 1,  # for quick test
-                        "limit_train_batches": 10,  # for quick test
-                        "enable_checkpointing": False,
+                        "limit_train_batches": 2,  # for quick test
                         "logger": False,
                     },
                     "model_params": {
-                        "cell_type": "GRU",
-                        "rnn_layers": 2,
-                        "hidden_size": 3,
+                        "cell_type": "LSTM",
+                        "rnn_layers": 1,
+                        "hidden_size": 2,
                         "log_interval": -1,
                     },
                     "dataset_params": {
                         "max_encoder_length": 3,
                     },
+                    "train_to_dataloader_params": {"batch_size": 2},
                     "random_log_path": True,  # fix multiprocess file access error in CI
                     "deterministic": True,  # to pass test_score
                 },
@@ -821,16 +828,19 @@ def get_test_params(cls, parameter_set="default"):
                 {
                     "trainer_params": {
                         "max_epochs": 1,  # for quick test
-                        "limit_train_batches": 10,  # for quick test
-                        "enable_checkpointing": False,
+                        "limit_train_batches": 2,  # for quick test
                         "logger": False,
                     },
                     "model_params": {
+                        "cell_type": "GRU",
+                        "rnn_layers": 1,
+                        "hidden_size": 2,
                         "log_interval": -1,
                     },
                     "dataset_params": {
                         "max_encoder_length": 3,
                     },
+                    "train_to_dataloader_params": {"batch_size": 2},
                     "random_log_path": True,  # fix multiprocess file access error in CI
                     "deterministic": True,  # to pass test_score
                 },
@@ -838,18 +848,19 @@ def get_test_params(cls, parameter_set="default"):
                     "trainer_params": {
                         "callbacks": [early_stop_callback],
                         "max_epochs": 1,  # for quick test
-                        "limit_train_batches": 10,  # for quick test
-                        "enable_checkpointing": False,
+                        "limit_train_batches": 2,  # for quick test
                         "logger": False,
                     },
                     "model_params": {
-                        "cell_type": "GRU",
-                        "rnn_layers": 3,
+                        "cell_type": "LSTM",
+                        "rnn_layers": 1,
                         "log_interval": -1,
+                        "hidden_size": 2,
                     },
                     "dataset_params": {
                         "max_encoder_length": 3,
                     },
+                    "train_to_dataloader_params": {"batch_size": 2},
                     "random_log_path": True,  # fix multiprocess file access error in CI
                     "deterministic": True,  # to pass test_score
                 },
@@ -1053,15 +1064,16 @@ def get_test_params(cls, parameter_set="default"):
                 {
                     "trainer_params": {
                         "max_epochs": 1,  # for quick test
-                        "limit_train_batches": 10,  # for quick test
+                        "limit_train_batches": 2,  # for quick test
                         "enable_checkpointing": False,
                         "logger": False,
                     },
                     "dataset_params": {
                         "max_encoder_length": 3,
                     },
+                    "train_to_dataloader_params": {"batch_size": 2},
                     "model_params": {
-                        "hidden_size": 8,
+                        "hidden_size": 4,
                         "n_blocks": [1, 1],
                         "n_layers": 1,
                         "log_interval": -1,
@@ -1071,14 +1083,14 @@ def get_test_params(cls, parameter_set="default"):
                 {
                     "trainer_params": {
                         "max_epochs": 1,  # for quick test
-                        "limit_train_batches": 10,  # for quick test
+                        "limit_train_batches": 2,  # for quick test
                         "enable_checkpointing": False,
                         "logger": False,
                     },
                     "model_params": {
                         "interpolation_mode": "nearest",
                         "activation": "Tanh",
-                        "hidden_size": 8,
+                        "hidden_size": 4,
                         "n_blocks": [1, 1],
                         "n_layers": 1,
                         "log_interval": -1,
@@ -1086,6 +1098,7 @@ def get_test_params(cls, parameter_set="default"):
                     "dataset_params": {
                         "max_encoder_length": 3,
                     },
+                    "train_to_dataloader_params": {"batch_size": 2},
                     "random_log_path": True,  # fix multiprocess file access error in CI
                 },
             ]
@@ -1103,12 +1116,12 @@ def get_test_params(cls, parameter_set="default"):
                 {
                     "trainer_params": {
                         "max_epochs": 1,  # for quick test
-                        "limit_train_batches": 10,  # for quick test
+                        "limit_train_batches": 2,  # for quick test
                         "enable_checkpointing": False,
                         "logger": False,
                     },
                     "model_params": {
-                        "hidden_size": 8,
+                        "hidden_size": 4,
                         "n_blocks": [1, 1],
                         "n_layers": 1,
                         "log_interval": -1,
@@ -1116,20 +1129,21 @@ def get_test_params(cls, parameter_set="default"):
                     "dataset_params": {
                         "max_encoder_length": 3,
                     },
+                    "train_to_dataloader_params": {"batch_size": 2},
                     "random_log_path": True,  # fix multiprocess file access error in CI
                 },
                 {
                     "trainer_params": {
                         "callbacks": [early_stop_callback],
                         "max_epochs": 1,  # for quick test
-                        "limit_train_batches": 10,  # for quick test
+                        "limit_train_batches": 2,  # for quick test
                         "enable_checkpointing": False,
                         "logger": False,
                     },
                     "model_params": {
                         "interpolation_mode": "nearest",
                         "activation": "Tanh",
-                        "hidden_size": 8,
+                        "hidden_size": 4,
                         "n_blocks": [1, 1],
                         "n_layers": 1,
                         "log_interval": -1,
@@ -1137,6 +1151,7 @@ def get_test_params(cls, parameter_set="default"):
                     "dataset_params": {
                         "max_encoder_length": 3,
                     },
+                    "train_to_dataloader_params": {"batch_size": 2},
                     "random_log_path": True,  # fix multiprocess file access error in CI
                 },
             ]
diff --git a/sktime/tests/test_all_estimators.py b/sktime/tests/test_all_estimators.py
index 84227aaf853..9abdfd11aee 100644
--- a/sktime/tests/test_all_estimators.py
+++ b/sktime/tests/test_all_estimators.py
@@ -25,8 +25,8 @@
     BasePairwiseTransformerPanel,
 )
 from sktime.exceptions import NotFittedError
-from sktime.forecasting.base import BaseForecaster, _BaseGlobalForecaster
-from sktime.registry import all_estimators, get_base_class_list, scitype
+from sktime.forecasting.base import BaseForecaster
+from sktime.registry import all_estimators, get_base_class_lookup, scitype
 from sktime.regression.deep_learning.base import BaseDeepRegressor
 from sktime.tests._config import (
     EXCLUDE_ESTIMATORS,
@@ -917,29 +917,22 @@ def test_inheritance(self, estimator_class):
                 f"estimator: {estimator_class} has fit method, but"
                 f"is not a sub-class of BaseEstimator."
             )
-        from sktime.pipeline import Pipeline
 
-        if issubclass(estimator_class, Pipeline):
-            return
-
-        VALID_BASE_CLS = tuple(get_base_class_list(include_baseobjs=False))
-        VALID_MIXIN = get_base_class_list(mixin=True)
-        VALID_SECOND_CLS = tuple(VALID_MIXIN + [_BaseGlobalForecaster])
-
-        # Usually estimators inherit only from one BaseEstimator type, but in some cases
-        # they may be predictor and transformer at the same time (e.g. pipelines)
-        n_base_types = sum(issubclass(estimator_class, cls) for cls in VALID_BASE_CLS)
-
-        assert 2 >= n_base_types >= 1
+        est_scitypes = scitype(
+            estimator_class, force_single_scitype=False, coerce_to_list=True
+        )
 
-        # If the estimator inherits from more than one base estimator type, we check if
-        # one of them is a transformer base type or _BaseGlobalForecaster type
-        # Global forecasters inherit from _BaseGlobalForecaster,
-        # _BaseGlobalForecaster inherit from BaseForecaster
-        # therefore, global forecasters is subclass of
-        # _BaseGlobalForecaster and BaseForecaster
-        if n_base_types > 1:
-            assert issubclass(estimator_class, VALID_SECOND_CLS)
+        class_lookup = get_base_class_lookup()
+
+        for est_scitype in est_scitypes:
+            if est_scitype in class_lookup:
+                expected_parent = class_lookup[est_scitype]
+                msg = (
+                    f"Estimator: {estimator_class} is tagged as having scitype "
+                    f"{est_scitype} via tag object_type, but is not a sub-class of "
+                    f"the corresponding base class {expected_parent.__name__}."
+                )
+                assert issubclass(estimator_class, expected_parent), msg
 
     def test_has_common_interface(self, estimator_class):
         """Check estimator implements the common interface."""
diff --git a/sktime/utils/dependencies/_dependencies.py b/sktime/utils/dependencies/_dependencies.py
index 91c52b37d9d..1c4a1322baf 100644
--- a/sktime/utils/dependencies/_dependencies.py
+++ b/sktime/utils/dependencies/_dependencies.py
@@ -16,7 +16,6 @@
 
 def _check_soft_dependencies(
     *packages,
-    package_import_alias="deprecated",
     severity="error",
     obj=None,
     msg=None,
@@ -37,8 +36,6 @@ def _check_soft_dependencies(
         ``_check_soft_dependencies(("package1", "package2"))``
         ``_check_soft_dependencies(["package1", "package2"])``
 
-    package_import_alias : ignored, present only for backwards compatibility
-
     severity : str, "error" (default), "warning", "none"
         behaviour for raising errors or warnings
 
@@ -80,17 +77,6 @@ def _check_soft_dependencies(
     -------
     boolean - whether all packages are installed, only if no exception is raised
     """
-    # todo 0.33.0: remove this warning
-    if package_import_alias != "deprecated":
-        warnings.warn(
-            "In sktime _check_soft_dependencies, the package_import_alias argument "
-            "is deprecated and no longer has any effect. "
-            "The argument will be removed in version 0.33.0, so users of the "
-            "_check_soft_dependencies utility should not pass this argument anymore.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-
     if len(packages) == 1 and isinstance(packages[0], (tuple, list)):
         packages = packages[0]
     if not all(isinstance(x, str) for x in packages):